10.1287/mksc.1060.0263

}, abstract = {The cross-product term in moderated regression may be collinear with its constituent parts, making it difficult to detect main, simple, and interaction effects. The literature shows that mean-centering can reduce the covariance between the linear and the interaction terms, thereby suggesting that it reduces collinearity. We analytically prove that mean-centering neither changes the computational precision of parameters, the sampling accuracy of main effects, simple effects, interaction effects, nor the R2. We also show that the determinants of the cross product matrix X' X are identical for uncentered and mean-centered data, so the collinearity problem in the moderated regression is unchanged by mean-centering. Many empirical marketing researchers commonly mean-center their moderated regression data hoping that this will improve the precision of estimates from ill conditioned, collinear data, but unfortunately, this hope is futile. Therefore, researchers using moderated regression models should not mean-center in a specious attempt to mitigate collinearity between the linear and the interaction terms. Of course, researchers may wish to mean-center for interpretive purposes and other reasons.}, number = {3}, journal = {{MARKETING} {SCIENCE}}, author = {Echambadi, Raj and Hess, James D.}, month = may, year = {2007}, pages = {438--445}, file = {HighWire Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5TK33WZI/Echambadi and Hess - 2007 - Mean-Centering Does Not Alleviate Collinearity Pro.pdf:application/pdf;HighWire Snapshot:/home/pauljohn/Documents/Zotero/storage/7NIN5943/438.html:text/html} }, @article{mccullagh_sampling_2008, title = {Sampling Bias and Logistic Models}, volume = {70}, issn = {1369-7412}, url = {http://www.jstor.org/stable/20203849}, abstract = {In a regression model, the joint distribution for each finite sample of units is determined by a function \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ depending only on the list of covariate values \${{\textbackslash}bf x}=(x(u\_{1}),...,x(u\_{n}))\$ on the sampled units. No random sampling of units is involved. In biological work, random sampling is frequently unavoidable, in which case the joint distribution p(y,x) depends on the sampling scheme. Regression models can be used for the study of dependence provided that the conditional distribution p(yǀx) for random samples agrees with \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ as determined by the regression model for a fixed sample having a non-random configuration x. The paper develops a model that avoids the concept of a fixed population of units, thereby forcing the sampling plan to be incorporated into the sampling distribution. For a quota sample having a predetermined covariate configuration x, the sampling distribution agrees with the standard logistic regression model with correlated components. For most natural sampling plans such as sequential or simple random sampling, the conditional distribution p(yǀx) is not the same as the regression distribution unless \$p\_{{{\textbackslash}bf x}}({{\textbackslash}bf y})\$ has independent components. In this sense, most natural sampling schemes involving binary random-effects models are biased. The implications of this formulation for subject-specific and population-averaged procedures are explored.}, number = {4}, journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)}, author = {{McCullagh}, Peter}, year = {2008}, pages = {643--677} }, @article{kamenetzky_estimating_1982, title = {Estimating Need and Demand for Prehospital Care}, volume = {30}, issn = {0030-{364X}}, url = {http://www.jstor.org/stable/170204}, abstract = {Models estimating demand and need for emergency transportation services are developed. These models can provide reliable estimates which can be used for planning purposes, by complementing and/or substituting for historical data. The model estimating demand requires only four independent variables: population in the area, employment in the area, and two indicators of socioeconomic status which can be obtained from census data. The model can be used to estimate demand according to 4 operational categories and 11 clinical categories. The parameters of the model are calibrated with 1979 data from 82 ambulance services covering over 200 minor civil divisions in Southwestern Pennsylvania. This model was tested with data from another 55 minor civil divisions, also in Southwestern Pennsylvania, and it provided good estimates of total demand. The model to estimate need evolves from the demand model. It enables planners to estimate unmet need occurring in the region. The effect of emergency transportation service {(ETS)} provider characteristics on demand was also investigated. Statistical tests show that, for purposes of forecasting demand, when the sociodemographic factors are taken into account, provider characteristics are not significant.}, number = {6}, journal = {Operations Research}, author = {Kamenetzky, Ricardo D. and Shuman, Larry J. and Wolfe, Harvey}, month = nov, year = {1982}, pages = {1148--1167}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RQH5Q2WK/Kamenetzky et al. - 1982 - Estimating Need and Demand for Prehospital Care.pdf:application/pdf} }, @article{kromrey_mean_1998, title = {Mean Centering in Moderated Multiple Regression: Much Ado about Nothing}, volume = {58}, shorttitle = {Mean Centering in Moderated Multiple Regression}, url = {http://epm.sagepub.com/content/58/1/42.abstract}, doi = {10.1177/0013164498058001005}, abstract = {Centering variables prior to the analysis of moderated multiple regression equations has been advocated for reasons both statistical (reduction of multicollinearity) and substantive (improved interpretation of the resulting regression equations). This article provides a comparison of centered and raw score analyses in least squares regression. The two methods are demonstrated to be equivalent, yielding identical hypothesis tests associated with the moderation effect and regression equations that are functionally equivalent.}, number = {1}, journal = {Educational and Psychological Measurement}, author = {Kromrey, Jeffrey D. and Foster-Johnson, Lynn}, month = feb, year = {1998}, pages = {42 --67}, file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/UQRM7J28/42.html:text/html} }, @article{king_statistical_1988, title = {Statistical Models for Political Science Event Counts: Bias in Conventional Procedures and Evidence for the Exponential Poisson Regression Model}, volume = {32}, issn = {0092-5853}, shorttitle = {Statistical Models for Political Science Event Counts}, url = {http://www.jstor.org/stable/2111248}, doi = {10.2307/2111248}, abstract = {This paper presents analytical, Monte Carlo, and empirical evidence on models for event count data. Event counts are dependent variables that measure the number of times some event occurs. Counts of international events are probably the most common, but numerous examples exist in every empirical field of the discipline. The results of the analysis below strongly suggest that the way event counts have been analyzed in hundreds of important political science studies have produced statistically and substantively unreliable results. Misspecification, inefficiency, bias, inconsistency, insufficiency, and other problems result from the unknowing application of two common methods that are without theoretical justification or empirical utility in this type of data. I show that the exponential Poisson regression {(EPR)} model provides analytically, in large samples, and empirically, in small, finite samples, a far superior model and optimal estimator. I also demonstrate the advantage of this methodology in an application to nineteenth-century party switching in the {U.S.} Congress. Its use by political scientists is strongly encouraged.}, number = {3}, journal = {American Journal of Political Science}, author = {King, Gary}, year = {1988}, pages = {838--863}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/G5GEHHWM/King - 1988 - Statistical Models for Political Science Event Cou.pdf:application/pdf} }, @article{schimek_non-_1997, title = {Non- and Semiparametric Alternatives to Generalized Linear Models}, url = {http://papers.ssrn.com/sol3/papers.cfm?abstract_id=4847}, abstract = {Additive and Generalized Additive Models {(GAM)} are discussed as completely nonparametric alternatives to Generalized Linear Models {(GLM).} Single Index Models {(SIM)} are reviewed as a means of nonparametrically specifying the link function in {GLMs.} Semiparametric models with a single as well as a multiple nonparametric component are considered in some detail. The penalized least squares technique is compared to Speckman's approach to partial linear models with one unparameterized explanatory variable. Further Generalized Partial Linear Models {(GPLM)} are briefly mentioned. For a multiple nonparametric component a thin plate spline approach and for a dependent vector variable a vector spline approach is discussed.}, journal = {{SSRN} {eLibrary}}, author = {Schimek, Michael G.}, month = mar, year = {1997}, file = {SSRN Snapshot:/home/pauljohn/Documents/Zotero/storage/MD8BAX8M/papers.html:text/html} }, @article{newton_bayesian_1996, title = {Bayesian Inference for Semiparametric Binary Regression}, volume = {91}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2291390}, doi = {10.2307/2291390}, abstract = {We propose a regression model for binary response data that places no structural restrictions on the link function except monotonicity and known location and scale. Predictors enter linearly. We demonstrate Bayesian inference calculations in this model. By modifying the Dirichlet process, we obtain a natural prior measure over this semiparametric model, and we use Polya sequence theory to formulate this measure in terms of a finite number of unobserved variables. We design a Markov chain Monte Carlo algorithm for posterior simulation and apply the methodology to data on radiotherapy treatments for cancer.}, number = {433}, journal = {Journal of the American Statistical Association}, author = {Newton, Michael A. and Czado, Claudia and Chappell, Rick}, month = mar, year = {1996}, pages = {142--153} }, @article{hubbard_why_2008, title = {Why P Values Are Not a Useful Measure of Evidence in Statistical Significance Testing}, volume = {18}, url = {http://tap.sagepub.com/content/18/1/69.abstract}, doi = {10.1177/0959354307086923}, abstract = {Reporting p values from statistical significance tests is common in psychology's empirical literature. Sir Ronald Fisher saw the p value as playing a useful role in knowledge development by acting as an `objective' measure of inductive evidence against the null hypothesis. We review several reasons why the p value is an unobjective and inadequate measure of evidence when statistically testing hypotheses. A common theme throughout many of these reasons is that p values exaggerate the evidence against H0 . This, in turn, calls into question the validity of much published work based on comparatively small, including .05, p values. Indeed, if researchers were fully informed about the limitations of the p value as a measure of evidence, this inferential index could not possibly enjoy its ongoing ubiquity. Replication with extension research focusing on sample statistics, effect sizes, and their confidence intervals is a better vehicle for reliable knowledge development than using p values. Fisher would also have agreed with the need for replication research.}, number = {1}, journal = {Theory \& Psychology}, author = {Hubbard, Raymond and Lindsay, R. Murray}, month = feb, year = {2008}, pages = {69 --88}, file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/APDBST3M/69.html:text/html} }, @article{groemping_relative_2006, title = {Relative Importance for Linear Regression in R: The Package relaimpo}, volume = {17}, issn = {1548-7660}, url = {http://www.jstatsoft.org/v17/i01}, number = {1}, journal = {Journal of Statistical Software}, author = {Groemping, Ulrike}, year = {2006}, pages = {1–27} }, @article{gelman_difference_2006, title = {The Difference Between {“Significant”} and {“Not} Significant” is not Itself Statistically Significant}, volume = {60}, issn = {0003-1305, 1537-2731}, url = {http://pubs.amstat.org/doi/abs/10.1198/000313006X152649}, doi = {10.1198/000313006X152649}, journal = {The American Statistician}, author = {Gelman, Andrew and Stern, Hal}, month = nov, year = {2006}, pages = {328--331}, file = {American Statistical Association - The American Statistician - 60(4):328:/home/pauljohn/Documents/Zotero/storage/CG8APAT7/000313006X152649.html:text/html} }, @article{marra_practical_2011, title = {Practical variable selection for generalized additive models}, volume = {55}, issn = {0167-9473}, url = {http://www.sciencedirect.com/science/article/pii/S0167947311000491}, doi = {10.1016/j.csda.2011.02.004}, abstract = {The problem of variable selection within the class of generalized additive models, when there are many covariates to choose from but the number of predictors is still somewhat smaller than the number of observations, is considered. Two very simple but effective shrinkage methods and an extension of the nonnegative garrote estimator are introduced. The proposals avoid having to use nonparametric testing methods for which there is no general reliable distributional theory. Moreover, component selection is carried out in one single step as opposed to many selection procedures which involve an exhaustive search of all possible models. The empirical performance of the proposed methods is compared to that of some available techniques via an extensive simulation study. The results show under which conditions one method can be preferred over another, hence providing applied researchers with some practical guidelines. The procedures are also illustrated analysing data on plasma beta-carotene levels from a cross-sectional study conducted in the United States.}, number = {7}, journal = {Computational Statistics \& Data Analysis}, author = {Marra, Giampiero and Wood, Simon N.}, month = jul, year = {2011}, keywords = {Generalized additive model, Nonnegative garrote estimator, Penalized thin plate regression spline, Practical variable selection, Shrinkage smoother}, pages = {2372--2387}, file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/T4UATDVG/Marra and Wood - 2011 - Practical variable selection for generalized addit.pdf:application/pdf;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/REX6RWBB/S0167947311000491.html:text/html} }, @article{white_heteroskedasticity-consistent_1980, title = {A Heteroskedasticity-Consistent Covariance Matrix Estimator and a Direct Test for Heteroskedasticity}, volume = {48}, issn = {0012-9682}, url = {http://www.jstor.org/stable/1912934}, doi = {10.2307/1912934}, abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, number = {4}, journal = {Econometrica}, author = {White, Halbert}, month = may, year = {1980}, pages = {817--838}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5URHUFUJ/White - 1980 - A Heteroskedasticity-Consistent Covariance Matrix .pdf:application/pdf} }, @book{miles_applying_2000, edition = {1}, title = {Applying Regression and Correlation: A Guide for Students and Researchers}, isbn = {0761962301}, shorttitle = {Applying Regression and Correlation}, publisher = {Sage Publications Ltd}, author = {Miles, Jeremy and Shevlin, Mark}, month = nov, year = {2000} }, @article{mood_logistic_2010, title = {Logistic Regression: Why We Cannot Do What We Think We Can Do, and What We Can Do About It}, volume = {26}, shorttitle = {Logistic Regression}, url = {http://esr.oxfordjournals.org/content/26/1/67.abstract}, doi = {10.1093/esr/jcp006}, abstract = {Logistic regression estimates do not behave like linear regression estimates in one important respect: They are affected by omitted variables, even when these variables are unrelated to the independent variables in the model. This fact has important implications that have gone largely unnoticed by sociologists. Importantly, we cannot straightforwardly interpret log-odds ratios or odds ratios as effect measures, because they also reflect the degree of unobserved heterogeneity in the model. In addition, we cannot compare log-odds ratios or odds ratios for similar models across groups, samples, or time points, or across models with different independent variables in a sample. This article discusses these problems and possible ways of overcoming them.}, number = {1}, journal = {European Sociological Review}, author = {Mood, Carina}, month = feb, year = {2010}, pages = {67 --82}, file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/G8WB82TK/67.html:text/html} }, @article{caffo_flexible_2007, title = {Flexible random intercept models for binary outcomes using mixtures of normals}, volume = {51}, issn = {0167-9473}, url = {http://www.sciencedirect.com/science/article/pii/S0167947306003574}, doi = {10.1016/j.csda.2006.09.031}, abstract = {Random intercept models for binary data are useful tools for addressing between-subject heterogeneity. Unlike linear models, the non-linearity of link functions used for binary data force a distinction between marginal and conditional interpretations. This distinction is blurred in probit models with a normally distributed random intercept because the resulting model implies a probit marginal link as well. That is, this model is closed in the sense that the distribution associated with the marginal and conditional link functions and the random effect distribution are all of the same family. It is shown that the closure property is also attained when the distributions associated with the conditional and marginal link functions and the random effect distribution are mixtures of normals. The resulting flexible family of models is demonstrated to be related to several others present in the literature and can be used to synthesize several seemingly disparate modeling approaches. In addition, this family of models offers considerable computational benefits. A diverse series of examples is explored that illustrates the wide applicability of this approach.}, number = {11}, journal = {Computational Statistics \& Data Analysis}, author = {Caffo, Brian and An, Ming-Wen and Rohde, Charles}, month = jul, year = {2007}, keywords = {Logit-normal, Marginalized multilevel models, Probit-normal}, pages = {5220--5235}, file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/AVR94VNE/science.html:text/html;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/T8STDIDI/S0167947306003574.html:text/html} }, @article{liang_variable_2009, title = {Variable Selection for Partially Linear Models with Measurement Errors}, volume = {104}, issn = {0162-1459}, doi = {10.1198/jasa.2009.0127}, abstract = {This article focuses on variable selection for partially linear models when the covariates are measured with additive errors. We propose two classes of variable selection procedures, penalized least squares and penalized quantile regression, using the nonconvex penalized principle. The first procedure corrects the bias in the loss function caused by the measurement error by applying the so-called correction-for-attenuation approach, whereas the second procedure corrects the bias by using orthogonal regression. The sampling properties for the two procedures are investigated. The rate of convergence and the asymptotic normality of the resulting estimates are established. We further demonstrate that, with proper choices of the penalty functions and the regularization parameter, the resulting estimates perform asymptotically as well as an oracle procedure (). Choice of smoothing parameters is also discussed. Finite sample performance of the proposed variable selection procedures is assessed by Monte Carlo simulation studies. We further illustrate the proposed procedures by an application.}, number = {485}, journal = {Journal of the American Statistical Association}, author = {Liang, Hua and Li, Runze}, year = {2009}, note = {{PMID:} 20046976 {PMCID:} {PMC2697854}}, pages = {234--248}, file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/GT4MBREP/LIANG and LI - 2009 - Variable Selection for Partially Linear Models wit.pdf:application/pdf} }, @book{cohen_applied_2002, edition = {Third}, title = {Applied Multiple {Regression/Correlation} Analysis for the Behavioral Sciences}, isbn = {0805822232}, publisher = {Routledge Academic}, author = {Cohen, Jacob and Cohen, Patricia and West, Stephen G. and Aiken, Leona S.}, month = aug, year = {2002} }, @article{coull_random_2000, title = {Random Effects Modeling of Multiple Binomial Responses Using the Multivariate Binomial Logit-Normal Distribution}, volume = {56}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2677105}, abstract = {The multivariate binomial logit-normal distribution is a mixture distribution for which, (i) conditional on a set of success probabilities and sample size indices, a vector of counts is independent binomial variates, and (ii) the vector of logits of the parameters has a multivariate normal distribution. We use this distribution to model multivariate binomial-type responses using a vector of random effects. The vector of logits of parameters has a mean that is a linear function of explanatory variables and has an unspecified or partly specified covariance matrix. The model generalizes and provides greater flexibility than the univariate model that uses a normal random effect to account for positive correlations in clustered data. The multivariate model is useful when different elements of the response vector refer to different characteristics, each of which may naturally have its own random effect. It is also useful for repeated binary measurement of a single response when there is a nonexchangeable association structure, such as one often expects with longitudinal data or when negative association exists for at least one pair of responses. We apply the model to an influenza study with repeated responses in which some pairs are negatively associated and to a developmental toxicity study with continuation-ratio logits applied to an ordinal response with clustered observations.}, number = {1}, journal = {Biometrics}, author = {Coull, Brent A. and Agresti, Alan}, month = mar, year = {2000}, pages = {73--80}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TWDKDX3F/Coull and Agresti - 2000 - Random Effects Modeling of Multiple Binomial Respo.pdf:application/pdf} }, @article{rabe-hesketh_parameterization_2001, title = {Parameterization of multivariate random effects models for categorical data}, volume = {57}, issn = {0006-{341X}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/11764269}, abstract = {Alternative parameterizations and problems of identification and estimation of multivariate random effects models for categorical responses are investigated. The issues are illustrated in the context of the multivariate binomial logit-normal {(BLN)} model introduced by Coull and Agresti (2000, Biometrics 56, 73-80). We demonstrate that the {BLN} model is poorly identified unless proper restrictions are imposed on the parameters. Moreover, estimation of {BLN} models is unduly computationally complex. In the first application considered by Coull and Agresti, an identification problem results in highly unstable, highly correlated parameter estimates and large standard errors. A probit-normal version of the specified {BLN} model is demonstrated to be underidentified, whereas the {BLN} model is empirically underidentified. Identification can be achieved by constraining one of the parameters. We show that a one-factor probit model is equivalent to the probit version of the specified {BLN} model and that a one-factor logit model is empirically equivalent to the {BLN} model. Estimation is greatly simplified by using a factor model.}, number = {4}, journal = {Biometrics}, author = {Rabe-Hesketh, S and Skrondal, A}, month = dec, year = {2001}, keywords = {Biometry, Data Interpretation, Statistical, Disease Outbreaks, Humans, Influenza, Human, Models, Statistical, Multivariate analysis}, pages = {1256--1264} }, @article{mulder_multidimensional_2008, title = {Multidimensional Adaptive Testing with Optimal Design Criteria for Item Selection}, volume = {74}, issn = {0033-3123, 1860-0980}, url = {http://www.springerlink.com/content/31110wh62u57v179/}, doi = {10.1007/s11336-008-9097-5}, journal = {Psychometrika}, author = {Mulder, Joris and Linden, Wim J.}, month = dec, year = {2008}, pages = {273--296}, file = {SpringerLink - Psychometrika, Volume 74, Number 2:/home/pauljohn/Documents/Zotero/storage/G8A3C4JE/31110wh62u57v179.html:text/html} }, @article{colombi_multivariate_1998, title = {A multivariate logit model with marginal canonical association}, volume = {27}, issn = {0361-0926, 1532-{415X}}, url = {http://www.tandfonline.com/doi/abs/10.1080/03610929808832266}, doi = {10.1080/03610929808832266}, journal = {Communications in Statistics - Theory and Methods}, author = {Colombi, Roberto}, month = jan, year = {1998}, pages = {2953--2971}, file = {Taylor & Francis Online :: A multivariate logit model with marginal canonical association - Communications in Statistics - Theory and Methods - Volume 27, Issue 12:/home/pauljohn/Documents/Zotero/storage/BUNZ6VQQ/03610929808832266.html:text/html} }, @article{bergsma_marginal_2002, title = {Marginal models for categorical data}, volume = {30}, issn = {0090-5364}, url = {http://projecteuclid.org/euclid.aos/1015362188}, doi = {10.1214/aos/1015362188}, abstract = {Statistical models defined by imposing restrictions on marginal distributions of contingency tables have received considerable attention recently. This paper introduces a general definition of marginal log-linear parameters and describes conditions for a marginal log-linear parameter to be a smooth parameterization of the distribution and to be variation independent. Statistical models defined by imposing affine restrictions on the marginal log-linear parameters are investigated. These models generalize ordinary log-linear and multivariate logistic models. Sufficient conditions for a log-affine marginal model to be nonempty and to be a curved exponential family are given. Standard large-sample theory is shown to apply to maximum likelihood estimation of log-affine marginal models for a variety of sampling procedures.}, number = {1}, journal = {The Annals of Statistics}, author = {Bergsma, Wicher P.}, month = feb, year = {2002}, pages = {140--159}, file = {Euclid Project PDF:/home/pauljohn/Documents/Zotero/storage/X56EJQQE/DPubS.html:text/html} }, @article{heagerty_marginal_1996, title = {Marginal Regression Models for Clustered Ordinal Measurements}, volume = {91}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2291722}, doi = {10.2307/2291722}, abstract = {This article constructs statistical models for clustered ordinal measurements. We specify two regression models: one for the marginal means and one for the marginal pairwise global odds ratios. Of particular interest are problems in which the odds ratio regression is a focus. Simple assumptions about higher-order conditional moments give a quadratic exponential likelihood function with second-order estimating equations {(GEE2)} as score equations. But computational difficulty can arise for large clusters when both the mean response and the association between measures is of interest. First, we present {GEE1} as an alternative estimation strategy. Second, we extend to repeated ordinal measurements the method developed by Carey et al. for binary observations that is based on alternating logistic regressions {(ALR)} for the marginal mean parameters and the pairwise log-odds ratio parameters. We study the efficiency of {GEE1} and {ALR} relative to full maximum likelihood. We demonstrate the utility of our regression methods for ordinal data by applying the methods to a surgical follow-up study.}, number = {435}, journal = {Journal of the American Statistical Association}, author = {Heagerty, Patrick J. and Zeger, Scott L.}, year = {1996}, pages = {1024--1036}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TH8MGWAE/Heagerty and Zeger - 1996 - Marginal Regression Models for Clustered Ordinal M.pdf:application/pdf} }, @article{ekholm_marginal_1995, title = {Marginal regression analysis of a multivariate binary response}, volume = {82}, url = {http://biomet.oxfordjournals.org/content/82/4/847.abstract}, doi = {10.1093/biomet/82.4.847}, abstract = {We propose the use of the mean parameter for regression analysis of a multivariate binary response. We model the association using dependence ratios defined in terms of the mean parameter, the components of which are the joint success probabilities of all orders. This permits flexible modelling of higher-order associations, using maximum likelihood estimation. We reanalyse two data sets, one with variable cluster size and the other a longitudinal data set with constant cluster size.}, number = {4}, journal = {Biometrika}, author = {Ekholm, Anders and Smith, Peter W. F. and {McDonald}, John W.}, month = dec, year = {1995}, pages = {847 --854}, file = {Snapshot:/home/pauljohn/Documents/Zotero/storage/56QPVNHP/847.html:text/html} }, @article{ekholm_association_2000, title = {Association Models for a Multivariate Binary Response}, volume = {56}, issn = {{0006341X}}, url = {http://eprints.ioe.ac.uk/5446/}, doi = {10.1111/j.0006-341X.2000.00712.x}, journal = {Biometrics}, author = {Ekholm, Anders and {McDonald}, John W. and Smith, Peter W. F.}, month = sep, year = {2000}, pages = {712--718}, file = {Association models for a multivariate binary response - IOE Eprints:/home/pauljohn/Documents/Zotero/storage/E5XT8CXP/5446.html:text/html} }, @article{ananth_modeling_2004, title = {Modeling Multivariate Binary Responses with Multiple Levels of Nesting Based on Alternating Logistic Regressions: an Application to Caries Aggregation}, volume = {83}, shorttitle = {Modeling Multivariate Binary Responses with Multiple Levels of Nesting Based on Alternating Logistic Regressions}, url = {http://jdr.sagepub.com/content/83/10/776.abstract}, doi = {10.1177/154405910408301008}, abstract = {Clustered binary responses are commonly encountered in dental research. Data analysis may include modeling both the marginal response probabilities (i.e., risk) and the dependence structure between pairs of responses (i.e., aggregation). While second-order generalized estimating equations {(GEE2)} is a well-known approach for such data, alternating logistic regressions {(ALR)} is a computationally efficient alternative method, especially for large clusters. We illustrate {ALR} with an application to caries aggregation using a dataset with 3 levels of nesting: tooth surfaces within an interproximal {(IP)} region, {IP} regions within a jaw, and jaws within a subject. Caries lesions appear to aggregate strongly within subjects with a spatially distributed risk. The minimum within-{IP-region} odds ratio {(OR)} was 2.25 (95\% confidence interval 1.15, 4.41), and the within-{IP-region} {ORs} were always greater than the between-{IP-region} {ORs.} {ALR} is a convenient and useful regression technique for explicit modeling of the dependence structure, and may be applicable to other dental research problems involving clustered or nested responses.}, number = {10}, journal = {Journal of Dental Research}, author = {Ananth, {C.V.} and Kantor, {M.L.}}, month = oct, year = {2004}, pages = {776 --781}, file = {Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TN5TMPRG/Ananth and Kantor - 2004 - Modeling Multivariate Binary Responses with Multip.pdf:application/pdf;Snapshot:/home/pauljohn/Documents/Zotero/storage/AI5DJWRJ/776.html:text/html} }, @article{prentice_correlated_1988, title = {Correlated Binary Regression with Covariates Specific to Each Binary Observation}, volume = {44}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2531733}, doi = {10.2307/2531733}, abstract = {Regression methods are considered for the analysis of correlated binary data when each binary observation may have its own covariates. It is argued that binary response models that condition on some or all binary responses in a given "block" are useful for studying certain types of dependencies, but not for the estimation of marginal response probabilities or pairwise correlations. Fully parametric approaches to these latter problems appear to be unduly complicated except in such special cases as the analysis of paired binary data. Hence, a generalized estimating equation approach is advocated for inference on response probabilities and correlations. Illustrations involving both small and large block sizes are provided.}, number = {4}, journal = {Biometrics}, author = {Prentice, Ross L.}, month = dec, year = {1988}, pages = {1033--1048}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/A9CGJTAI/Prentice - 1988 - Correlated Binary Regression with Covariates Speci.pdf:application/pdf} }, @article{basu_marginal_2003, title = {Marginal Likelihood and Bayes Factors for Dirichlet Process Mixture Models}, volume = {98}, issn = {0162-1459}, url = {http://www.jstor.org/stable/30045209}, abstract = {We present a method for comparing semiparametric Bayesian models, constructed under the Dirichlet process mixture {(DPM)} framework, with alternative semiparameteric or parameteric Bayesian models. A distinctive feature of the method is that it can be applied to semiparametric models containing covariates and hierarchical prior structures, and is apparently the first method of its kind. Formally, the method is based on the marginal likelihood estimation approach of Chib (1995) and requires estimation of the likelihood and posterior ordinates of the {DPM} model at a single high-density point. An interesting computation is involved in the estimation of the likelihood ordinate, which is devised via collapsed sequential importance sampling. Extensive experiments with synthetic and real data involving semiparametric binary data regression models and hierarchical longitudinal mixed-effects models are used to illustrate the implementation, performance, and applicability of the method.}, number = {461}, journal = {Journal of the American Statistical Association}, author = {Basu, Sanjib and Chib, Siddhartha}, month = mar, year = {2003}, pages = {224--235}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/69A8TPVD/Basu and Chib - 2003 - Marginal Likelihood and Bayes Factors for Dirichle.pdf:application/pdf} }, @article{corcoran_exact_2001, title = {An Exact Trend Test for Correlated Binary Data}, volume = {57}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/3068436}, abstract = {The problem of testing a dose-response relationship in the presence of exchangeably correlated binary data has been addressed using a variety of models. Most commonly used approaches are derived from likelihood or generalized estimating equations and rely on large-sample theory to justify their inferences. However, while earlier work has determined that these methods may perform poorly for small or sparse samples, there are few alternatives available to those faced with such data. We propose an exact trend test for exchangeably correlated binary data when groups of correlated observations are ordered. This exact approach is based on an exponential model derived by Molenberghs and Ryan (1999) and Ryan and Molenberghs (1999) and provides natural analogues to Fisher's exact test and the binomial trend test when the data are correlated. We use a graphical method with which one can efficiently compute the exact tail distribution and apply the test to two examples.}, number = {3}, journal = {Biometrics}, author = {Corcoran, Chris and Ryan, Louise and Senchaudhuri, Pralay and Mehta, Cyrus and Patel, Nitin and Molenberghs, Geert}, year = {2001}, pages = {941--948}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/4AMHAWRA/Corcoran et al. - 2001 - An Exact Trend Test for Correlated Binary Data.pdf:application/pdf} }, @article{webster_patents_2011, title = {Do Patents Matter for Commercialization?}, volume = {54}, issn = {0022-2186}, url = {http://www.jstor.org/stable/10.1086/658487}, abstract = {Abstract This paper estimates the effect of a patent grant on the likelihood that an invention will progress to different commercialization stages, using survey data on 3,162 inventions that were the subject of a patent application. We find that about 40 percent of all inventions advanced to the point of market launch and mass production. Although a patent grant had no effect on the decision to proceed with the commercialization process, being refused a patent reduced the probability of attempting market launch and mass production by about 13 percentage points. Over and above this, having protection from several other complementary patents increased the probability of commercialization by an additional 3–5 percentage points.}, number = {2}, journal = {Journal of Law and Economics}, author = {Webster, Elizabeth and Jensen, Paul H.}, month = may, year = {2011}, pages = {431--453}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TC2W6QK3/Webster and Jensen - 2011 - Do Patents Matter for Commercialization.pdf:application/pdf} }, @article{ge_womens_2011, title = {Women’s College Decisions: How Much Does Marriage Matter?}, volume = {29}, issn = {0734-{306X}}, shorttitle = {Women’s College Decisions}, url = {http://www.jstor.org/stable/10.1086/660774}, abstract = {This article investigates the sequential college attendance decision of young women and quantifies the effect of marriage expectations on their decision to attend and graduate from college. A dynamic choice model of college attendance, labor supply, and marriage is formulated and structurally estimated using panel data from the {NLSY79.} The model is used to simulate the effects of no marriage benefits and finds that the predicted college enrollment rate will drop from 58.0\% to 50.5\%. Using the estimated model, the college attendance behavior for a younger cohort from the {NLSY97} is predicted and used to validate the behavioral model.}, number = {4}, journal = {Journal of Labor Economics}, author = {Ge, Suqin}, month = oct, year = {2011}, pages = {773--818}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/FCBSA973/Ge - 2011 - Women’s College Decisions How Much Does Marriage .pdf:application/pdf} }, @article{damon_j._phillips_jazz_2011, title = {Jazz and the Disconnected: City Structural Disconnectedness and the Emergence of a Jazz Canon, 1897–1933}, volume = {117}, issn = {0002-9602}, shorttitle = {Jazz and the Disconnected}, url = {http://www.jstor.org/stable/10.1086/661757}, abstract = {The study of organizations and markets suffers from the underdevelopment of disconnected producers. This article emphasizes the imputed identities of sources to argue that difficult-to-categorize outputs were appealing when associated with a source high in disconnectedness. Worldwide data on recordings and mobility with detailed data on Midwest recordings provide evidence that jazz from cities high in disconnectedness was rerecorded more often by musicians over time. Moreover, recordings with difficult-to-categorize elements were more likely to be rerecorded when coming from cities high in disconnectedness, despite evidence that original music was paradoxically less likely to come from these cities.}, number = {2}, journal = {American Journal of Sociology}, author = {Damon J. Phillips}, year = {2011}, note = {{ArticleType:} research-article / Full publication date: September 2011 / Copyright © 2011 The University of Chicago Press}, pages = {420--483}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/26DTPMIQ/Damon J. Phillips - 2011 - Jazz and the Disconnected City Structural Disconn.pdf:application/pdf} }, @article{koenig_variable_2011, title = {Variable Helper Effects, Ecological Conditions, and the Evolution of Cooperative Breeding in the Acorn Woodpecker.}, volume = {178}, issn = {0003-0147}, url = {http://www.jstor.org/stable/10.1086/660832}, abstract = {Abstract The ecological conditions leading to delayed dispersal and helping behavior are generally thought to follow one of two contrasting scenarios: that conditions are stable and predictable, resulting in young being ecologically forced to remain as helpers (extrinsic constraints and the habitat saturation hypothesis), or that conditions are highly variable and unpredictable, leading to the need for helpers to raise young, at least when conditions are poor (intrinsic constraints and the hard life hypothesis). We investigated how variability in ecological conditions influences the degree to which helpers augment breeder fitness in the cooperatively breeding acorn woodpecker {(Melanerpes} formicivorus), a species in which the acorn crop, territory quality, and prior breeding experience all vary in ways that have important effects on fitness. We found that the relationship between ecological conditions and the probability that birds would remain as helpers was variable but that helpers generally yielded greater fitness benefits when ecological conditions were favorable, rather than unfavorable, for breeding. These results affirm the importance of extrinsic constraints to delayed dispersal and cooperative breeding in this species, despite its dependence on a highly variable and unpredictable acorn crop. Our findings also confirm that helpers can have very different fitness effects, depending on conditions, but that those effects are not necessarily greater when breeding conditions are unfavorable.}, number = {2}, journal = {The American Naturalist}, author = {Koenig, Walter D. and Walters, Eric L. and Haydock, Joseph}, year = {2011}, pages = {145--158}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/64M4ZCGX/Koenig et al. - 2011 - Variable Helper Effects, Ecological Conditions, an.pdf:application/pdf} }, @article{hinkkanen_sentencing_2011, title = {Sentencing Theory, Policy, and Research in the Nordic Countries}, volume = {40}, issn = {0192-3234}, url = {http://www.jstor.org/stable/10.1086/661182}, abstract = {Abstract Sentencing in the Nordic countries follows the civil law tradition. Sentencing councils and advisory boards have no role, as the guidance for the courts is given mainly in the form of legislative sentencing principles and court precedents. Legislative sentencing guidance has undergone changes both in form and substance. The 1970s decline of the rehabilitative ideal was mirrored in sentencing reforms that emphasized proportionality and predictability in sentencing. This was the case especially in Finland and Sweden. But unlike in many other countries, these ideological changes did not result to increased severity—for Finland the result was the opposite. While the Nordic legislators have shown increased interest in more detailed guidance of sentencing, courts are still left with a wide range of sentencing options and fairly broad penal latitudes. At the same time the sentencing systems place high value in consistency and uniformity in sentencing. To reconcile these aims, further devices are needed for the structuring of the sentencing decision, including legal-theoretical analyses and empirical research. The paper discusses the latter point in more detail with more recent research examples from Finland.}, number = {1}, journal = {Crime and Justice}, author = {Hinkkanen, Ville and Lappi-Seppälä, Tapio}, year = {2011}, pages = {349--404}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/K9HUDUFS/Hinkkanen and Lappi-Seppälä - 2011 - Sentencing Theory, Policy, and Research in the Nor.pdf:application/pdf} }, @article{delavande_criminal_2010, title = {Criminal Prosecution and Human Immunodeficiency {Virus–Related} Risky Behavior}, volume = {53}, issn = {0022-2186}, url = {http://www.jstor.org/stable/10.1086/655806}, abstract = {Abstract We examine the consequences of prosecuting people who are human immunodeficiency virus {(HIV)} positive and expose others to the infection. We show that the effect of such prosecutions on the spread of {HIV} is a priori ambiguous. The prosecutions deter unsafe sex. However, they also create incentives for having sex with partners who are more promiscuous, which consequently increases the spread of {HIV.} We test these predictions and find that such prosecutions are associated with a reduction in the number of partners, an increase in safe sex, and an increase in sex with prostitutes. We estimate that doubling the prosecution rate could decrease the total number of new {HIV} infections by one-third over a 10-year period.}, number = {4}, journal = {Journal of Law and Economics}, author = {Delavande, Adeline and Goldman, Dana and Sood, Neeraj}, month = nov, year = {2010}, note = {{ArticleType:} research-article / Full publication date: November 2010 / Copyright © 2010 The University of Chicago}, pages = {741--782}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UUICACJV/Delavande et al. - 2010 - Criminal Prosecution and Human Immunodeficiency Vi.pdf:application/pdf} }, @article{freedman_endogeneity_2010, title = {Endogeneity in Probit Response Models}, volume = {18}, issn = {1047-1987}, url = {http://www.jstor.org/stable/25792001}, abstract = {We look at conventional methods for removing endogeneity bias in regression models, including the linear model and the probit model. It is known that the usual Heckman two-step procedure should not be used in the probit model: from a theoretical perspective, it is unsatisfactory, and likelihood methods are superior. However, serious numerical problems occur when standard software packages try to maximize the biprobit likelihood function, even if the number of covariates is small. We draw conclusions for statistical practice. Finally, we prove the conditions under which parameters in the model are identifiable. The conditions for identification are delicate; we believe these results are new.}, number = {2}, journal = {Political Analysis}, author = {Freedman, David A. and Sekhon, Jasjeet S.}, month = apr, year = {2010}, note = {{ArticleType:} research-article / Full publication date: Spring 2010 / Copyright © 2010 Society for Political Methodology}, pages = {138--150} }, @article{fiocco_new_2009, title = {A new serially correlated gamma-frailty process for longitudinal count data}, volume = {10}, url = {http://biostatistics.oxfordjournals.org/content/10/2/245.abstract}, doi = {10.1093/biostatistics/kxn031}, abstract = {We describe a new multivariate gamma distribution and discuss its implication in a Poisson-correlated gamma-frailty model. This model is introduced to account for between-subjects correlation occurring in longitudinal count data. For likelihood-based inference involving distributions in which high-dimensional dependencies are present, it may be useful to approximate likelihoods based on the univariate or bivariate marginal distributions. The merit of composite likelihood is to reduce the computational complexity of the full likelihood. A 2-stage composite-likelihood procedure is developed for estimating the model parameters. The suggested method is applied to a meta-analysis study for survival curves.}, number = {2}, journal = {Biostatistics}, author = {Fiocco, M. and Putter, H. and Van Houwelingen, J.c.}, month = apr, year = {2009}, pages = {245 --257}, file = {Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TJJ8UQ5Q/Fiocco et al. - 2009 - A new serially correlated gamma-frailty process fo.pdf:application/pdf;Snapshot:/home/pauljohn/Documents/Zotero/storage/MTASXIF8/245.html:text/html} }, @article{behncke_unemployed_2010, title = {Unemployed and Their Caseworkers: Should They Be Friends or Foes?}, volume = {173}, issn = {0964-1998}, shorttitle = {Unemployed and Their Caseworkers}, url = {http://www.jstor.org/stable/20622579}, abstract = {In many countries, caseworkers in public employment offices have dual roles of counselling and monitoring unemployed people. These roles often conflict, which results in important caseworker heterogeneity: some consider providing services to their clients and satisfying their demands as their primary task. However, others may pursue their own strategies, even against the will of the unemployed person. They may assign jobs and labour market programmes without the consent of the unemployed person. On the basis of a very detailed linked jobseeker—caseworker data set for Switzerland, we investigate the effects of caseworkers' co-operativeness on the probabilities of employment of their clients. Modified statistical matching methods reveal that caseworkers who place less emphasis on a co-operative and harmonic relationship with their clients increase their chances of employment in the short and medium term.}, number = {1}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Behncke, Stefanie and Frölich, Markus and Lechner, Michael}, month = jan, year = {2010}, pages = {67--92} }, @article{zelner_using_2009, title = {Using Simulation to Interpret Results from Logit, Probit, and Other Nonlinear Models}, volume = {30}, issn = {0143-2095}, url = {http://www.jstor.org/stable/27735494}, abstract = {In a recent issue of this journal, Glenn Hoetker proposes that researchers improve the interpretation and presentation of logit and probit results by reporting the marginal effects of key independent variables at theoretically interesting or empirically relevant values of the other independent variables in the model, and also by presenting results graphically {(Hoetker}, 2007: 335, 337). In this research note, I suggest an alternative approach for achieving this objective: reporting differences in predicted probabilities associated with discrete changes in key independent variable values. This intuitive approach to interpretation is especially useful when the theoretically interesting or empirically relevant changes in independent variables values are not very small, and also for models that contain interaction terms (or higher-order terms such as quadratics). Although the graphical presentations recommended by Hoetker implicitly embody this approach, they typically fail to include appropriate measures of statistical significance, and may therefore lead to erroneous conclusions. In order to calculate such measures, I recommend and demonstrate an intuitive simulation-based approach to statistical interpretation, developed by King et al. (2000), that has gained widespread adherence in the field of political science. Throughout the article, I provide a running example based on research that has previously appeared in the Strategic Management Journal.}, number = {12}, journal = {Strategic Management Journal}, author = {Zelner, Bennet A.}, month = dec, year = {2009}, pages = {1335--1348} }, @article{gelabert_does_2009, title = {Does the Effect of Public Support for {R\&D} Depend on the Degree of Appropriability?}, volume = {57}, issn = {0022-1821}, url = {http://www.jstor.org/stable/27750733}, abstract = {We explore the interaction between public support for {R\&D} and appropriability using a dataset constructed from the Spanish Community Innovation Survey, for the period 2000–2005. We find that public support policy is less able to stimulate privately financed internal {R\&D} in firms where appropriability mechanisms are more effective. On average, the effect of public support for {R\&D} is three times larger for those firms reporting a level of appropriability below the median vis-à-vis those firms for which appropriability is above the median level. Furthermore, for supported firms with the highest degree of appropriability, crowding out cannot be ruled out.}, number = {4}, journal = {The Journal of Industrial Economics}, author = {Gelabert, Liliana and Fosfuri, Andrea and Tribó, Josep A.}, month = dec, year = {2009}, pages = {736--767} }, @article{smithson_better_2006, title = {A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables}, volume = {11}, issn = {1082-{989X}}, shorttitle = {A better lemon squeezer?}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16594767}, doi = {10.1037/1082-989X.11.1.54}, abstract = {Uncorrectable skew and heteroscedasticity are among the "lemons" of psychological data, yet many important variables naturally exhibit these properties. For scales with a lower and upper bound, a suitable candidate for models is the beta distribution, which is very flexible and models skew quite well. The authors present maximum-likelihood regression models assuming that the dependent variable is conditionally beta distributed rather than Gaussian. The approach models both means (location) and variances (dispersion) with their own distinct sets of predictors (continuous and/or categorical), thereby modeling heteroscedasticity. The location sub-model link function is the logit and thereby analogous to logistic regression, whereas the dispersion sub-model is log linear. Real examples show that these models handle the independent observations case readily. The article discusses comparisons between beta regression and alternative techniques, model selection and interpretation, practical estimation, and software.}, number = {1}, journal = {Psychological Methods}, author = {Smithson, Michael and Verkuilen, Jay}, month = mar, year = {2006}, keywords = {Analysis of Variance, Bias {(Epidemiology)}, Child, Data Interpretation, Statistical, Dyslexia, Humans, Least-Squares Analysis, Likelihood Functions, Linear Models, Models, Statistical, Normal Distribution, Regression analysis, Reproducibility of Results}, pages = {54--71} }, @article{buzkova_permutation_2011, title = {Permutation and parametric bootstrap tests for gene-gene and gene-environment interactions}, volume = {75}, issn = {1469-1809}, url = {http://www.ncbi.nlm.nih.gov/pubmed/20384625}, doi = {10.1111/j.1469-1809.2010.00572.x}, abstract = {Permutation tests are widely used in genomic research as a straightforward way to obtain reliable statistical inference without making strong distributional assumptions. However, in this paper we show that in genetic association studies it is not typically possible to construct exact permutation tests of gene-gene or gene-environment interaction hypotheses. We describe an alternative to the permutation approach in testing for interaction, a parametric bootstrap approach. Using simulations, we compare the finite-sample properties of a few often-used permutation tests and the parametric bootstrap. We consider interactions of an exposure with single and multiple polymorphisms. Finally, we address when permutation tests of interaction will be approximately valid in large samples for specific test statistics.}, number = {1}, journal = {Annals of Human Genetics}, author = {Bůžková, Petra and Lumley, Thomas and Rice, Kenneth}, month = jan, year = {2011}, keywords = {Animals, Computer Simulation, Environment, Epistasis, Genetic, Genetic Association Studies, Humans, Models, Genetic, Models, Statistical, Polymorphism, Genetic}, pages = {36--45} }, @article{balch_hierarchic_2000, title = {Hierarchic Social Entropy: An Information Theoretic Measure of Robot Group Diversity}, volume = {8}, issn = {0929-5593}, shorttitle = {Hierarchic Social Entropy}, url = {http://dx.doi.org/10.1023/A:1008973424594}, doi = {10.1023/A:1008973424594}, number = {3}, journal = {Auton. Robots}, author = {Balch, Tucker}, month = jun, year = {2000}, keywords = {behavioral diversity, heterogeneity, multi-robot systems}, pages = {209–238} }, @article{preacher_computational_2006, title = {Computational Tools for Probing Interactions in Multiple Linear Regression, Multilevel Modeling, and Latent Curve Analysis}, volume = {31}, issn = {1076-9986}, url = {http://www.jstor.org/stable/4122453}, abstract = {Simple slopes, regions of significance, and confidence bands are commonly used to evaluate interactions in multiple linear regression {(MLR)} models, and the use of these techniques has recently been extended to multilevel or hierarchical linear modeling {(HLM)} and latent curve analysis {(LCA).} However, conducting these tests and plotting the conditional relations is often a tedious and error-prone task. This article provides an overview of methods used to probe interaction effects and describes a unified collection of freely available online resources that researchers can use to obtain significance tests for simple slopes, compute regions of significance, and obtain confidence bands for simple slopes across the range of the moderator in the {MLR}, {HLM}, and {LCA} contexts. Plotting capabilities are also provided.}, number = {4}, journal = {Journal of Educational and Behavioral Statistics}, author = {Preacher, Kristopher J. and Curran, Patrick J. and Bauer, Daniel J.}, month = dec, year = {2006}, pages = {437--448}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/U4US525I/Preacher et al. - 2006 - Computational Tools for Probing Interactions in Mu.pdf:application/pdf} }, @book{shannon_mathematical_1949, address = {Urbana}, title = {The Mathematical Theory of Communication}, publisher = {University of Illinois Press}, author = {Shannon, Claude Elwood and Weaver, Warren}, year = {1949}, keywords = {Mathematical physics, Telecommunication} }, @book{aiken_multiple_1991, title = {Multiple Regression: Testing and Interpreting Interactions}, isbn = {0761907122}, shorttitle = {Multiple Regression}, publisher = {Sage Publications, Inc}, author = {Aiken, Leona S. and West, Stephen G.}, month = jan, year = {1991} }, @article{liao_estimated_2000, title = {Estimated Precision for Predictions from Generalized Linear Models in Sociological Research}, volume = {34}, issn = {0033-5177}, url = {http://www.springerlink.com/content/p71102h877436h26/abstract/}, doi = {10.1023/A:1004798429785}, abstract = {In this paper I present a general method forconstructing confidence intervals for predictionsfrom the generalized linear model in sociologicalresearch. I demonstrate that the method used forconstructing confidence intervals for predictions inclassical linear models is indeed a special case ofthe method for generalized linear models. I examinefour such models – the binary logit, the binaryprobit, the ordinal logit, and the Poissonregression model – to construct confidence intervalsfor predicted values in the form of probability,odds, Z score, or event count. The estimatedconfidence interval for an event prediction, whenapplied judiciously, can give the researcher usefulinformation and an estimated measure of precisionfor the prediction so that interpretation ofestimates from the generalized linear model becomeseasier.}, number = {2}, journal = {Quality \& Quantity}, author = {Liao, Tim Futing}, year = {2000}, keywords = {Humanities, Social Sciences and Law}, pages = {137--152}, file = {SpringerLink Snapshot:/home/pauljohn/Documents/Zotero/storage/68NCKMBV/Liao - 2000 - Estimated Precision for Predictions from Generaliz.html:text/html} }, @article{lin_comparison_2010, title = {A comparison of multiple imputation with {EM} algorithm and {MCMC} method for quality of life missing data}, volume = {44}, issn = {0033-5177}, url = {http://www.springerlink.com/content/mh5353g766075586/abstract/}, doi = {10.1007/s11135-008-9196-5}, abstract = {This study investigated the performance of multiple imputations with Expectation-Maximization {(EM)} algorithm and Monte Carlo Markov chain {(MCMC)} method in missing data imputation. We compared the accuracy of imputation based on some real data and set up two extreme scenarios and conducted both empirical and simulation studies to examine the effects of missing data rates and number of items used for imputation. In the empirical study, the scenario represented item of highest missing rate from a domain with fewest items. In the simulation study, we selected a domain with most items and the item imputed has lowest missing rate. In the empirical study, the results showed there was no significant difference between {EM} algorithm and {MCMC} method for item imputation, and number of items used for imputation has little impact, either. Compared with the actual observed values, the middle responses of 3 and 4 were over-imputed, and the extreme responses of 1, 2 and 5 were under-represented. The similar patterns occurred for domain imputation, and no significant difference between {EM} algorithm and {MCMC} method and number of items used for imputation has little impact. In the simulation study, we chose environmental domain to examine the effect of the following variables: {EM} algorithm and {MCMC} method, missing data rates, and number of items used for imputation. Again, there was no significant difference between {EM} algorithm and {MCMC} method. The accuracy rates did not significantly reduce with increase in the proportions of missing data. Number of items used for imputation has some contribution to accuracy of imputation, but not as much as expected.}, number = {2}, journal = {Quality \& Quantity}, author = {Lin, Ting}, year = {2010}, keywords = {Humanities, Social Sciences and Law}, pages = {277--287}, file = {SpringerLink Snapshot:/home/pauljohn/Documents/Zotero/storage/THZQT2PQ/Lin - 2010 - A comparison of multiple imputation with EM algori.html:text/html} }, @article{zheng_summarizing_2000, title = {Summarizing the predictive power of a generalized linear model}, volume = {19}, issn = {0277-6715}, url = {http://www.ncbi.nlm.nih.gov/pubmed/10861777}, abstract = {This paper studies summary measures of the predictive power of a generalized linear model, paying special attention to a generalization of the multiple correlation coefficient from ordinary linear regression. The population value is the correlation between the response and its conditional expectation given the predictors, and the sample value is the correlation between the observed response and the model predicted value. We compare four estimators of the measure in terms of bias, mean squared error and behaviour in the presence of overparameterization. The sample estimator and a jack-knife estimator usually behave adequately, but a cross-validation estimator has a large negative bias with large mean squared error. One can use bootstrap methods to construct confidence intervals for the population value of the correlation measure and to estimate the degree to which a model selection procedure may provide an overly optimistic measure of the actual predictive power.}, number = {13}, journal = {Statistics in Medicine}, author = {Zheng, B and Agresti, A}, month = jul, year = {2000}, note = {{PMID:} 10861777}, keywords = {Bias {(Epidemiology)}, Confidence Intervals, Data Interpretation, Statistical, Humans, Infant, Low Birth Weight, Infant, Newborn, Likelihood Functions, Linear Models, Logistic Models, Predictive Value of Tests}, pages = {1771--1781} }, @article{xu_improved_2006, title = {Improved confidence regions for a mean vector under general conditions}, volume = {51}, issn = {0167-9473}, url = {http://dx.doi.org/10.1016/j.csda.2005.10.011}, doi = {10.1016/j.csda.2005.10.011}, number = {2}, journal = {Comput. Stat. Data Anal.}, author = {Xu, Jin and Gupta, Arjun K.}, month = nov, year = {2006}, keywords = {{62E20}, Confidence region, Coverage probability, Edgeworth expansion, Normalization transformation, primary {62G15}, secondary {62H10}}, pages = {1051–1062} }, @article{sun_confidence_2000, title = {Confidence bands in generalized linear models}, volume = {28}, issn = {0090-5364}, url = {http://projecteuclid.org/euclid.aos/1016218225}, doi = {10.1214/aos/1016218225}, abstract = {Generalized linear models {(GLM)} include many useful models. This paper studies simultaneous confidence regions for the mean response function in these models. The coverage probabilities of these regions are related to tail probabilities of maxima of Gaussian random fields, asymptotically, and hence, the so-called tube formula is applicable without any modification. However, in the generalized linear models, the errors are often nonadditive and non-Gaussian and may be discrete. This poses a challenge to the accuracy of the approximation by the tube formula in the moderate sample situation. Here two alternative approaches are considered. These approaches are based on an Edgeworth expansion for the distribution of a maximum likelihood estimator and a version of Skorohod’s representation theorem, which are used to convert an error term (which is of order \$n{\textasciicircum}{-1 /2}\$ in one-sided confidence regions and of \$n{\textasciicircum}{-1} in two-sided confidence regions) from the Edgeworth expansion to a “bias” term. The bias is then estimated and corrected in two ways to adjust the approximation formula. Examples and simulations show that our methods are viable and complementary to existing methods. An application to insect data is provided. Code for implementing our procedures is available via the software parfit}, language = {{EN}}, number = {2}, journal = {The Annals of Statistics}, author = {Sun, Jiayang and Loader, Catherine and William P., {McCormick}}, month = apr, year = {2000}, pages = {429--460} }, @article{withers_improved_????, title = {Improved confidence regions based on Edgeworth expansions}, issn = {0167-9473}, url = {http://www.sciencedirect.com/science/article/pii/S0167947312001454}, doi = {10.1016/j.csda.2012.03.019}, abstract = {Let w ̂ be a consistent estimate of w in R p satisfying the standard cumulant expansion in powers of n − 1 with asymptotic covariance n − 1 V . Then n 1 / 2 ( w ̂ − w ) has the standard Edgeworth expansion about N p ( 0 , V ) . We obtain from this the Edgeworth expansions for T n ( V ) = n ( w ̂ − w ) ′ V − 1 ( w ̂ − w ) about χ p 2 and for its Studentized version, T n ( V ̂ ) . So, we obtain a confidence region for w of level α + O ( n − 2 ) .}, number = {0}, journal = {Computational Statistics \& Data Analysis}, author = {Withers, Christopher S. and Nadarajah, Saralees}, keywords = {Cumulants, Edgeworth expansions, Ellipsoidal confidence regions}, file = {ScienceDirect Full Text PDF:/home/pauljohn/Documents/Zotero/storage/B9MUEMX9/Withers and Nadarajah - Improved confidence regions based on Edgeworth exp.pdf:application/pdf;ScienceDirect Snapshot:/home/pauljohn/Documents/Zotero/storage/GS5ZMQGA/S0167947312001454.html:text/html} }, @article{ni_automatic_2009, title = {Automatic Model Selection for Partially Linear Models}, volume = {100}, issn = {0047-{259X}}, doi = {10.1016/j.jmva.2009.06.009}, abstract = {We propose and study a unified procedure for variable selection in partially linear models. A new type of double-penalized least squares is formulated, using the smoothing spline to estimate the nonparametric part and applying a shrinkage penalty on parametric components to achieve model parsimony. Theoretically we show that, with proper choices of the smoothing and regularization parameters, the proposed procedure can be as efficient as the oracle estimator (). We also study the asymptotic properties of the estimator when the number of parametric effects diverges with the sample size. Frequentist and Bayesian estimates of the covariance and confidence intervals are derived for the estimators. One great advantage of this procedure is its linear mixed model {(LMM)} representation, which greatly facilitates its implementation by using standard statistical software. Furthermore, the {LMM} framework enables one to treat the smoothing parameter as a variance component and hence conveniently estimate it together with other regression coefficients. Extensive numerical studies are conducted to demonstrate the effective performance of the proposed procedure.}, number = {9}, journal = {Journal of multivariate analysis}, author = {Ni, Xiao and Zhang, Hao Helen and Zhang, Daowen}, month = oct, year = {2009}, pages = {2100--2111}, file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/R6ZCI3GB/Ni et al. - 2009 - Automatic Model Selection for Partially Linear Mod.pdf:application/pdf} }, @article{ma_variable_2010, title = {Variable Selection in Measurement Error Models}, volume = {16}, issn = {1350-7265}, abstract = {Measurement error data or errors-in-variable data are often collected in many studies. Natural criterion functions are often unavailable for general functional measurement error models due to the lack of information on the distribution of the unobservable covariates. Typically, the parameter estimation is via solving estimating equations. In addition, the construction of such estimating equations routinely requires solving integral equations, hence the computation is often much more intensive compared with ordinary regression models. Because of these difficulties, traditional best subset variable selection procedures are not applicable, and in the measurement error model context, variable selection remains an unsolved issue. In this paper, we develop a framework for variable selection in measurement error models via penalized estimating equations. We first propose a class of selection procedures for general parametric measurement error models and for general semiparametric measurement error models, and study the asymptotic properties of the proposed procedures. Then, under certain regularity conditions and with a properly chosen regularization parameter, we demonstrate that the proposed procedure performs as well as an oracle procedure. We assess the finite sample performance via Monte Carlo simulation studies and illustrate the proposed methodology through the empirical analysis of a familiar data set.}, number = {1}, journal = {Bernoulli : official journal of the Bernoulli Society for Mathematical Statistics and Probability}, author = {Ma, Yanyuan and Li, Runze}, year = {2010}, pages = {274--300}, file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XDSJSDT2/Ma and Li - 2010 - Variable Selection in Measurement Error Models.pdf:application/pdf} }, @article{garcia_variable_2010, title = {Variable Selection for Regression Models with Missing Data}, volume = {20}, issn = {1017-0405}, abstract = {We consider the variable selection problem for a class of statistical models with missing data, including missing covariate and/or response data. We investigate the smoothly clipped absolute deviation penalty {(SCAD)} and adaptive {LASSO} and propose a unified model selection and estimation procedure for use in the presence of missing data. We develop a computationally attractive algorithm for simultaneously optimizing the penalized likelihood function and estimating the penalty parameters. Particularly, we propose to use a model selection criterion, called the {ICQ} statistic, for selecting the penalty parameters. We show that the variable selection procedure based on {ICQ} automatically and consistently selects the important covariates and leads to efficient estimates with oracle properties. The methodology is very general and can be applied to numerous situations involving missing data, from covariates missing at random in arbitrary regression models to nonignorably missing longitudinal responses and/or covariates. Simulations are given to demonstrate the methodology and examine the finite sample performance of the variable selection procedures. Melanoma data from a cancer clinical trial is presented to illustrate the proposed methodology.}, number = {1}, journal = {Statistica Sinica}, author = {Garcia, Ramon I. and Ibrahim, Joseph G. and Zhu, Hongtu}, month = jan, year = {2010}, pages = {149--165}, file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HH2BBF3T/Garcia et al. - 2010 - VARIABLE SELECTION FOR REGRESSION MODELS WITH MISS.pdf:application/pdf} }, @article{huang_bayesian_2005, title = {Bayesian analysis for generalized linear models with nonignorably missing covariates}, volume = {61}, issn = {0006-{341X}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16135028}, doi = {10.1111/j.1541-0420.2005.00338.x}, abstract = {We propose Bayesian methods for estimating parameters in generalized linear models {(GLMs)} with nonignorably missing covariate data. We show that when improper uniform priors are used for the regression coefficients, phi, of the multinomial selection model for the missing data mechanism, the resulting joint posterior will always be improper if (i) all missing covariates are discrete and an intercept is included in the selection model for the missing data mechanism, or (ii) at least one of the covariates is continuous and unbounded. This impropriety will result regardless of whether proper or improper priors are specified for the regression parameters, beta, of the {GLM} or the parameters, alpha, of the covariate distribution. To overcome this problem, we propose a novel class of proper priors for the regression coefficients, phi, in the selection model for the missing data mechanism. These priors are robust and computationally attractive in the sense that inferences about beta are not sensitive to the choice of the hyperparameters of the prior for phi and they facilitate a Gibbs sampling scheme that leads to accelerated convergence. In addition, we extend the model assessment criterion of Chen, Dey, and Ibrahim (2004a, Biometrika 91, 45-63), called the weighted L measure, to {GLMs} and missing data problems as well as extend the deviance information criterion {(DIC)} of Spiegelhalter et al. (2002, Journal of the Royal Statistical Society B 64, 583-639) for assessing whether the missing data mechanism is ignorable or nonignorable. A novel Markov chain Monte Carlo sampling algorithm is also developed for carrying out posterior computation. Several simulations are given to investigate the performance of the proposed Bayesian criteria as well as the sensitivity of the prior specification. Real datasets from a melanoma cancer clinical trial and a liver cancer study are presented to further illustrate the proposed methods.}, number = {3}, journal = {Biometrics}, author = {Huang, Lan and Chen, Ming-Hui and Ibrahim, Joseph G}, month = sep, year = {2005}, note = {{PMID:} 16135028}, keywords = {Bayes Theorem, Clinical Trials, Phase {III} as Topic, Computer Simulation, Data Interpretation, Statistical, Humans, Interferons, Linear Models, Liver Neoplasms, Male, Markov Chains, Melanoma, Middle Aged, {MONTE} Carlo method, Regression analysis}, pages = {767--780} }, @article{gibbons_mixed-effects_2008, title = {Mixed-effects Poisson regression analysis of adverse event reports}, volume = {27}, issn = {0277-6715}, doi = {10.1002/sim.3241}, abstract = {A new statistical methodology is developed for the analysis of spontaneous adverse event {(AE)} reports from post-marketing drug surveillance data. The method involves both empirical Bayes {(EB)} and fully Bayes estimation of rate multipliers for each drug within a class of drugs, for a particular {AE}, based on a mixed-effects Poisson regression model. Both parametric and semiparametric models for the random-effect distribution are examined. The method is applied to data from Food and Drug Administration {(FDA)’s} Adverse Event Reporting System {(AERS)} on the relationship between antidepressants and suicide. We obtain point estimates and 95 per cent confidence (posterior) intervals for the rate multiplier for each drug (e.g. antidepressants), which can be used to determine whether a particular drug has an increased risk of association with a particular {AE} (e.g. suicide). Confidence (posterior) intervals that do not include 1.0 provide evidence for either significant protective or harmful associations of the drug and the adverse effect. We also examine {EB}, parametric Bayes, and semiparametric Bayes estimators of the rate multipliers and associated confidence (posterior) intervals. Results of our analysis of the {FDA} {AERS} data revealed that newer antidepressants are associated with lower rates of suicide adverse event reports compared with older antidepressants. We recommend improvements to the existing {AERS} system, which are likely to improve its public health value as an early warning system.}, number = {11}, journal = {Statistics in medicine}, author = {Gibbons, Robert D. and Segawa, Eisuke and Karabatsos, George and Amatya, Anup K. and Bhaumik, Dulal K. and Brown, C. Hendricks and Kapur, Kush and Marcus, Sue M. and Hur, Kwan and Mann, J. John}, month = may, year = {2008}, pages = {1814--1833}, file = {PubMed Central Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2RPEXKQ9/Gibbons et al. - 2008 - Mixed-effects Poisson regression analysis of adver.pdf:application/pdf} }, @article{gibbons_random_1997, title = {Random Effects Probit and Logistic Regression Models for Three-Level Data}, volume = {53}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2533520}, doi = {10.2307/2533520}, abstract = {In analysis of binary data from clustered and longitudinal studies, random effect models have been recently developed to accommodate two-level problems such as subjects nested within clusters or repeated classifications within subjects. Unfortunately, these models cannot be applied to three-level problems that occur frequently in practice. For example, multicenter longitudinal clinical trials involve repeated assessments within individuals and individuals are nested within study centers. This combination of clustered and longitudinal data represents the classic three-level problem in biometry. Similarly, in prevention studies, various educational programs designed to minimize risk taking behavior (e.g., smoking prevention and cessation) may be compared where randomization to various design conditions is at the level of the school and the intervention is performed at the level of the classroom. Previous statistical approaches to the three-level problem for binary response data have either ignored one level of nesting, treated it as a fixed effect, or used first- and second-order Taylor series expansions of the logarithm of the conditional likelihood to linearize these models and estimate model parameters using more conventional procedures for measurement data. Recent studies indicate that these approximate solutions exhibit considerable bias and provide little advantage over use of traditional logistic regression analysis ignoring the hierarchical structure. In this paper, we generalize earlier results for two-level random effects probit and logistic regression models to the three-level case. Parameter estimation is based on full-information maximum marginal likelihood estimation {(MMLE)} using numerical quadrature to approximate the multiple random effects. The model is illustrated using data from 135 classrooms from 28 schools on the effects of two smoking cessation interventions.}, number = {4}, journal = {Biometrics}, author = {Gibbons, Robert D. and Hedeker, Donald}, month = dec, year = {1997}, pages = {1527--1537}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UAHXIUJD/Gibbons and Hedeker - 1997 - Random Effects Probit and Logistic Regression Mode.pdf:application/pdf} }, @article{rossi_nonparametric_2002, title = {Nonparametric Item Response Function Estimates with the {EM} Algorithm}, volume = {27}, issn = {1076-9986}, url = {http://www.jstor.org/stable/3648139}, abstract = {The methods of functional data analysis are used to estimate item response functions {(IRFs)} nonparametrically. The {EM} algorithm is used to maximize the penalized marginal likelihood of the data. The penalty controls the smoothness of the estimated {IRFs}, and is chosen so that, as the penalty is increased, the estimates converge to shapes closely represented by the three-parameter logistic family. The one-dimensional latent trait model is recast as a problem of estimating a space curve or manifold, and, expressed in this way, the model no longer involves any latent constructs, and is invariant with respect to choice of latent variable. Some results from differential geometry are used to develop a data-anchored measure of ability and a new technique for assessing item discriminability. Functional data-analytic techniques are used to explore the functional variation in the estimated {IRFs.} Applications involving simulated and actual data are included.}, number = {3}, journal = {Journal of Educational and Behavioral Statistics}, author = {Rossi, Natasha and Wang, Xiaohui and Ramsay, James O.}, month = oct, year = {2002}, pages = {291--317}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/6IUCZZXG/Rossi et al. - 2002 - Nonparametric Item Response Function Estimates wit.pdf:application/pdf} }, @article{patz_straightforward_1999, title = {A Straightforward Approach to Markov Chain Monte Carlo Methods for Item Response Models}, volume = {24}, issn = {1076-9986}, url = {http://www.jstor.org/stable/1165199}, doi = {10.2307/1165199}, abstract = {This paper demonstrates Markov chain Monte Carlo {(MCMC)} techniques that are particularly well-suited to complex models with item response theory {(IRT)} assumptions. {MCMC} may be thought of as a successor to the standard practice of first calibrating the items using E-M methods and then taking the item parameters to be known and fixed at their calibrated values when proceeding with inference regarding the latent trait. In contrast to this two-stage E-M approach, {MCMC} methods treat item and subject parameters at the same time; this allows us to incorporate standard errors of item estimates into trait inferences, and vice versa. We develop a {MCMC} methodology, based on Metropolis-Hastings sampling, that can be routinely implemented to fit novel {IRT} models, and we compare the algorithmic features of the Metropolis-Hastings approach to other approaches based on Gibbs sampling. For concreteness we illustrate the methodology using the familiar two-parameter logistic {(2PL)} {IRT} model; more complex models are treated in a subsequent paper {(Patz} \& Junker, in press).}, number = {2}, journal = {Journal of Educational and Behavioral Statistics}, author = {Patz, Richard J. and Junker, Brian W.}, month = jul, year = {1999}, pages = {146--178}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2G68936D/Patz and Junker - 1999 - A Straightforward Approach to Markov Chain Monte C.pdf:application/pdf} }, @article{aitkin_general_1999, title = {A General Maximum Likelihood Analysis of Variance Components in Generalized Linear Models}, volume = {55}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2533902}, abstract = {This paper describes an {EM} algorithm for nonparametric maximum likelihood {(ML)} estimation in generalized linear models with variance component structure. The algorithm provides an alternative analysis to approximate {MQL} and {PQL} analyses {(McGilchrist} and Aisbett, 1991, Biometrical Journal 33, 131-141; Breslow and Clayton, 1993; Journal of the American Statistical Association 88, 9-25; {McGilchrist}, 1994, Journal of the Royal Statistical Society, Series B 56, 61-69; Goldstein, 1995, Multilevel Statistical Models) and to {GEE} analyses {(Liang} and Zeger, 1986, Biometrika 73, 13-22). The algorithm, first given by Hinde and Wood (1987, in Longitudinal Data Analysis, 110-126), is a generalization of that for random effect models for overdispersion in generalized linear models, described in Aitkin (1996, Statistics and Computing 6, 251-262). The algorithm is initially derived as a form of Gaussian quadrature assuming a normal mixing distribution, but with only slight variation it can be used for a completely unknown mixing distribution, giving a straightforward method for the fully nonparametric {ML} estimation of this distribution. This is of value because the {ML} estimates of the {GLM} parameters can be sensitive to the specification of a parametric form for the mixing distribution. The nonparametric analysis can be extended straightforwardly to general random parameter models, with full {NPML} estimation of the joint distribution of the random parameters. This can produce substantial computational saving compared with full numerical integration over a specified parametric distribution for the random parameters. A simple method is described for obtaining correct standard errors for parameter estimates when using the {EM} algorithm. Several examples are discussed involving simple variance component and longitudinal models, and small-area estimation.}, number = {1}, journal = {Biometrics}, author = {Aitkin, Murray}, month = mar, year = {1999}, pages = {117--128}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RI2T7V6M/Aitkin - 1999 - A General Maximum Likelihood Analysis of Variance .pdf:application/pdf} }, @article{harwell_item_1988, title = {Item Parameter Estimation Via Marginal Maximum Likelihood and an {EM} Algorithm: A Didactic}, volume = {13}, issn = {0362-9791}, shorttitle = {Item Parameter Estimation Via Marginal Maximum Likelihood and an {EM} Algorithm}, url = {http://www.jstor.org/stable/1164654}, doi = {10.2307/1164654}, abstract = {The Bock and Aitkin (1981) Marginal Maximum {Likelihood/EM} approach to item parameter estimation is an alternative to the classical joint maximum likelihood procedure of item response theory. Unfortunately, the complexity of the underlying mathematics and the terse nature of the existing literature has made understanding of the approach difficult. To make the approach accessible to a wider audience, the present didactic paper provides the essential mathematical details of a marginal maximum {likelihood/EM} solution and shows how it can be used to obtain consistent item parameter estimates. For pedagogical purposes, a short {BASIC} computer program is used to illustrate the underlying simplicity of the method.}, number = {3}, journal = {Journal of Educational Statistics}, author = {Harwell, Michael R. and Baker, Frank B. and Zwarts, Michael}, month = oct, year = {1988}, pages = {243--271}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/C4HBTT43/Harwell et al. - 1988 - Item Parameter Estimation Via Marginal Maximum Lik.pdf:application/pdf} }, @article{rigdon_estimation_1987, title = {Estimation for the Rasch Model When Both Ability and Difficulty Parameters Are Random}, volume = {12}, issn = {0362-9791}, url = {http://www.jstor.org/stable/1164629}, doi = {10.2307/1164629}, abstract = {Estimation of the parameters of the Rasch model, a one-parameter item response model, is considered when both the item parameters and the ability parameters are considered random quantities. It is assumed that the item parameters are drawn from a N (γ, τ $^{\textrm{2}}$) distribution, and the abilities are drawn from a N(0, σ $^{\textrm{2}}$) distribution. A variation of the {EM} algorithm is used to find approximate maximum likelihood estimates of γ, τ, and σ. A second approach assumes that the difficulty parameters are drawn from a uniform distribution over part of the real line. Real and simulated data sets are discussed for illustration.}, number = {1}, journal = {Journal of Educational Statistics}, author = {Rigdon, Steven E. and Tsutakawa, Robert K.}, month = apr, year = {1987}, pages = {76--86}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HU52Z322/Rigdon and Tsutakawa - 1987 - Estimation for the Rasch Model When Both Ability a.pdf:application/pdf} }, @article{kamata_item_2001, title = {Item Analysis by the Hierarchical Generalized Linear Model}, volume = {38}, issn = {0022-0655}, url = {http://www.jstor.org/stable/1435439}, abstract = {The hierarchical generalized linear model {(HGLM)} is presented as an explicit, two-level formulation of a multilevel item response model. In this paper, it is shown that the {HGLM} is equivalent to the Rasch model and that, characteristic of the {HGLM}, person ability can be expressed in the form of random effects rather than parameters. The two-level item analysis model is presented as a latent regression model with person-characteristic variables. Furthermore, it is shown that the two-level {HGLM} model can be extended to a three-level latent regression model that permits investigation of the variation of students' performance across groups, such as is found in classrooms and schools, and of the interactive effect of person-and group-characteristic variables.}, number = {1}, journal = {Journal of Educational Measurement}, author = {Kamata, Akihito}, month = apr, year = {2001}, pages = {79--93}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UZWCX4QT/Kamata - 2001 - Item Analysis by the Hierarchical Generalized Line.pdf:application/pdf} }, @article{liu_mixed-effects_2006, title = {A Mixed-Effects Regression Model for Longitudinal Multivariate Ordinal Data}, volume = {62}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/3695729}, abstract = {A mixed-effects item response theory model that allows for three-level multivariate ordinal outcomes and accommodates multiple random subject effects is proposed for analysis of multivariate ordinal outcomes in longitudinal studies. This model allows for the estimation of different item factor loadings (item discrimination parameters) for the multiple outcomes. The covariates in the model do not have to follow the proportional odds assumption and can be at any level. Assuming either a probit or logistic response function, maximum marginal likelihood estimation is proposed utilizing multidimensional Gauss-Hermite quadrature for integration of the random effects. An iterative Fisher scoring solution, which provides standard errors for all model parameters, is used. An analysis of a longitudinal substance use data set, where four items of substance use behavior (cigarette use, alcohol use, marijuana use, and getting drunk or high) are repeatedly measured over time, is used to illustrate application of the proposed model.}, number = {1}, journal = {Biometrics}, author = {Liu, Li C. and Hedeker, Donald}, month = mar, year = {2006}, pages = {261--268}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/MV7BRTTN/Liu and Hedeker - 2006 - A Mixed-Effects Regression Model for Longitudinal .pdf:application/pdf} }, @article{hedeker_random-effects_1994, title = {A Random-Effects Ordinal Regression Model for Multilevel Analysis}, volume = {50}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2533433}, doi = {10.2307/2533433}, abstract = {A random-effects ordinal regression model is proposed for analysis of clustered or longitudinal ordinal response data. This model is developed for both the probit and logistic response functions. The threshold concept is used, in which it is assumed that the observed ordered category is determined by the value of a latent unobservable continuous response that follows a linear regression model incorporating random effects. A maximum marginal likelihood {(MML)} solution is described using Gauss-Hermite quadrature to numerically integrate over the distribution of random effects. An analysis of a dataset where students are clustered or nested within classrooms is used to illustrate features of random-effects analysis of clustered ordinal data, while an analysis of a longitudinal dataset where psychiatric patients are repeatedly rated as to their severity is used to illustrate features of the random-effects approach for longitudinal ordinal data.}, number = {4}, journal = {Biometrics}, author = {Hedeker, Donald and Gibbons, Robert D.}, month = dec, year = {1994}, pages = {933--944}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/IT7S3TAC/Hedeker and Gibbons - 1994 - A Random-Effects Ordinal Regression Model for Mult.pdf:application/pdf} }, @article{im_mixed_1988, title = {Mixed Models for Binomial Data with an Application to Lamb Mortality}, volume = {37}, issn = {0035-9254}, url = {http://www.jstor.org/stable/2347339}, doi = {10.2307/2347339}, abstract = {The simplex method, a derivative-free function maximisation algorithm, is used as an alternative to the {EM} algorithm in computing maximum likelihood estimates in mixed probit and logit models with binomial data. The models are used to estimate heritability and to predict sire effects when analysing a lamb mortality data set.}, number = {2}, journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)}, author = {Im, S. and Gianola, D.}, month = jan, year = {1988}, pages = {196--204}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/K4RWRRBN/Im and Gianola - 1988 - Mixed Models for Binomial Data with an Application.pdf:application/pdf} }, @article{gibbons_random-effects_1994, title = {A Random-Effects Probit Model for Predicting Medical Malpractice Claims}, volume = {89}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2290901}, doi = {10.2307/2290901}, abstract = {We use Oregon state data (1981-1990) on medical malpractice claims to develop a random-effects probit model for vulnerability to a medical malpractice claim in practice year k(k = 1, 2, ..., n$_{\textrm{i}}$) for physician i(i = 1, 2, ..., N physicians in the sample) conditional on an n$_{\textrm{i}}$ × p covariate matrix W$_{\textrm{i}}$ that contains a mixture of p time-varying and time-variant covariates. In this application, time-invariant covariates were physician sex and specialty (surgical versus nonsurgical). Time-varying covariates were age, the cumulative amount of risk management education (i.e., number of courses) taken by physician i to year k, and prior claim history. In addition, the model incorporates a random effect of "claim vulnerability" assumed to be normally distributed in the population of physicians. This random effect represents unobservable and/or unmeasured characteristics that place one physician at greater risk for experiencing a medical malpractice claim than another physician. In addition, we also determine if the effects of risk management training on claim vulnerability differ before and after the physician's first malpractice claim. Results of the analysis reveal that (1) there is a sizable random physician effect; (2) risk increases between age 40 to 60; (3) physicians in a surgical specialty are at increased risk; (4) male physicians are at greater risk than female physicians; (5) risk increases following an initial claim, particularly in the year subsequent to the initial claim, and (6) some beneficial effects of risk management education are observed in physicians with a prior claim history, particularly those in anesthesiology and obstetrics and gynecology.}, number = {427}, journal = {Journal of the American Statistical Association}, author = {Gibbons, Robert D. and Hedeker, Donald and Charles, Sara C. and Frisch, Paul}, year = {1994}, note = {{ArticleType:} research-article / Full publication date: Sep., 1994 / Copyright © 1994 American Statistical Association}, pages = {760--767}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WZQ82FRX/Gibbons et al. - 1994 - A Random-Effects Probit Model for Predicting Medic.pdf:application/pdf} }, @article{gibbons_applications_2000, title = {Applications of Mixed-Effects Models in Biostatistics}, volume = {62}, issn = {0581-5738}, url = {http://www.jstor.org/stable/25053120}, abstract = {We present recent developments in mixed-effects models relevant to application in biostatistics. The major focus is on application of mixed-effects models to analysis of longitudinal data in general and longitudinal controlled clinical trials in detail. We present application of mixed-effects models to the case of unbalanced longitudinal data with complex residual error structures for continuous, binary and ordinal outcome measures for data with two and three levels of nesting (e.g., a multi-center longitudinal clinical trial). We also examine other applications of mixed-effects models in the biological and behavioral sciences, such as analysis of clustered data, and simultaneous assessment of multiple biologic endpoints (e.g., multivariate probit analysis). We describe the general statistical theory and then present relevant examples of these models to problems in the biological sciences.}, number = {1}, journal = {Sankhyā: The Indian Journal of Statistics, Series B (1960-2002)}, author = {Gibbons, Robert D. and Hedeker, Donald}, month = apr, year = {2000}, pages = {70--103}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WVHF4QR2/Gibbons and Hedeker - 2000 - Applications of Mixed-Effects Models in Biostatist.pdf:application/pdf} }, @article{goldstein_improved_1996, title = {Improved Approximations for Multilevel Models with Binary Responses}, volume = {159}, issn = {0964-1998}, url = {http://www.jstor.org/stable/2983328}, doi = {10.2307/2983328}, abstract = {This paper discusses the use of improved approximations for the estimation of generalized linear multilevel models where the response is a proportion. Simulation studies by Rodriguez and Goldman have shown that in extreme situations large biases can occur, most notably when the response is binary, the number of level 1 units per level 2 unit is small and the underlying random parameter values are large. An improved approximation is introduced which largely eliminates the biases in the situation described by Rodriguez and Goldman.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Goldstein, Harvey and Jon Rasbash}, month = jan, year = {1996}, pages = {505--513}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JGN94I3I/Goldstein and Jon Rasbash - 1996 - Improved Approximations for Multilevel Models with.pdf:application/pdf} }, @article{agresti_random-effects_2000, title = {Random-Effects Modeling of Categorical Response Data}, volume = {30}, issn = {0081-1750}, url = {http://www.jstor.org/stable/271130}, abstract = {In many applications observations have some type of clustering, with observations within clusters tending to be correlated. A common instance of this occurs when each subject in the sample undergoes repeated measurement, in which case a cluster consists of the set of observations for the subject. One approach to modeling clustered data introduces cluster-level random effects into the model. The use of random effects in linear models for normal responses is well established. By contrast, random effects have only recently seen much use in models for categorical data. This chapter surveys a variety of potential social science applications of random effects modeling of categorical data. Applications discussed include repeated measurement for binary or ordinal responses, shrinkage to improve multiparameter estimation of a set of proportions or rates, multivariate latent variable modeling, hierarchically structured modeling, and cluster sampling. The models discussed belong to the class of generalized linear mixed models {(GLMMs)}, an extension of ordinary linear models that permits non-normal response variables and both fixed and random effects in the predictor term. The models are {GLMMs} for either binomial or Poisson response variables, although we also present extensions to multicategory (nominal or ordinal) responses. We also summarize some of the technical issues of model-fitting that complicate the fitting of {GLMMs} even with existing software.}, journal = {Sociological Methodology}, author = {Agresti, Alan and Booth, James G. and Hobert, James P. and Caffo, Brian}, month = jan, year = {2000}, note = {{ArticleType:} research-article / Full publication date: 2000 / Copyright © 2000 American Sociological Association}, pages = {27--80}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/MZZPQN4X/Agresti et al. - 2000 - Random-Effects Modeling of Categorical Response Da.pdf:application/pdf} }, @article{ramon_applications_1995, title = {Applications of the {EM} Algorithm to the Analysis of Life Length Data}, volume = {44}, issn = {0035-9254}, url = {http://www.jstor.org/stable/2986040}, doi = {10.2307/2986040}, abstract = {The parameters of the life length distribution of a given component are to be estimated. The observations on which inference is to be based are field data which are incomplete in some fashion. Thus, for example, the reported life length may include a period of unknown duration during which the component is not in use, the life length distribution may be affected by an unobserved environmental factor or the component may be part of a larger system, and failure mode analysis reveals only the module containing the failed component, not its identity. It is shown how the {EM} algorithm can be used to calculate the maximum likelihood estimates of the parameters of interest in these instances. The methodology is applied to some data on the life lengths of electronic components used in the telecommunications industry, yielding values that are similar to those obtained from complete observations on comparable components.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)}, author = {Ramon, Jose and Albert, G. and Baxter, Laurence A.}, month = jan, year = {1995}, pages = {323--341}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RAQX97X5/Ramon et al. - 1995 - Applications of the EM Algorithm to the Analysis o.pdf:application/pdf} }, @article{schoenberg_latent_1985, title = {Latent Variables in the Analysis of Limited Dependent Variables}, volume = {15}, issn = {0081-1750}, url = {http://www.jstor.org/stable/270851}, doi = {10.2307/270851}, journal = {Sociological Methodology}, author = {Schoenberg, Ronald}, month = jan, year = {1985}, pages = {212--241}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/2GU3RTCF/Schoenberg - 1985 - Latent Variables in the Analysis of Limited Depend.pdf:application/pdf} }, @article{raudenbush_multilevel_1991, title = {A Multilevel, Multivariate Model for Studying School Climate with Estimation Via the {EM} Algorithm and Application to U. S. High-School Data}, volume = {16}, issn = {0362-9791}, url = {http://www.jstor.org/stable/1165105}, doi = {10.2307/1165105}, abstract = {In many studies of school climate, researchers ask teachers a series of questions, and the responses to related questions are averaged or summed to create a scale score for each teacher on each dimension of climate under investigation. Researchers have disagreed, however, about the analysis of such data: Some have utilized the teacher as the analytic unit, and some have utilized the school as the unit. In this article, we propose a three-level, multivariate statistical modeling strategy that resolves the unit-of-analysis dilemma and unifies thinking about the analysis in such studies. A reanalysis of U. S. high-school data illustrates how to estimate and interpret: (a) the level of interteacher agreement on each climate dimension; (b) the internal consistency of measurement at the teacher and school levels; and (c) the correlations among "true" climate scores at each level. A linear model analysis utilized teacher control over school and classroom policy and teacher morale as bivariate latent outcomes to be predicted by school-level variables (e. g., sector, size, composition) and by teacher-level variables (e. g., education, race, sex, subject matter). Implications for conceptualization, design, analysis, and interpretation in future studies of school climate are considered.}, number = {4}, journal = {Journal of Educational Statistics}, author = {Raudenbush, Stephen W. and Rowan, Brian and Kang, Sang Jin}, month = dec, year = {1991}, pages = {295--330}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/SWVXUKNC/Raudenbush et al. - 1991 - A Multilevel, Multivariate Model for Studying Scho.pdf:application/pdf} }, @article{vermunt_multilevel_2003, title = {Multilevel Latent Class Models}, volume = {33}, issn = {0081-1750}, url = {http://www.jstor.org/stable/1519857}, abstract = {The latent class {(LC)} models that have been developed so far assume that observations are independent. Parametric and non-parametric random-coefficient {LC} models are proposed here, which will make it possible to modify this assumption. For example, the models can be used for the analysis of data collected with complex sampling designs, data with a multilevel structure, and multiple-group data for more than a few groups. An adapted {EM} algorithm is presented that makes maximum-likelihood estimation feasible. The new model is illustrated with examples from organizational, educational, and cross-national comparative research.}, journal = {Sociological Methodology}, author = {Vermunt, Jeroen K.}, month = jan, year = {2003}, pages = {213--239}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/I9ITCTNZ/Vermunt - 2003 - Multilevel Latent Class Models.pdf:application/pdf} }, @article{skrondal_prediction_2009, title = {Prediction in Multilevel Generalized Linear Models}, volume = {172}, issn = {0964-1998}, url = {http://www.jstor.org/stable/20622529}, abstract = {We discuss prediction of random effects and of expected responses in multilevel generalized linear models. Prediction of random effects is useful for instance in small area estimation and disease mapping, effectiveness studies and model diagnostics. Prediction of expected responses is useful for planning, model interpretation and diagnostics. For prediction of random effects, we concentrate on empirical Bayes prediction and discuss three different kinds of standard errors; the posterior standard deviation and the marginal prediction error standard deviation (comparative standard errors) and the marginal sampling standard deviation (diagnostic standard error). Analytical expressions are available only for linear models and are provided in an appendix. For other multilevel generalized linear models we present approximations and suggest using parametric bootstrapping to obtain standard errors. We also discuss prediction of expectations of responses or probabilities for a new unit in a hypothetical cluster, or in a new (randomly sampled) cluster or in an existing cluster. The methods are implemented in gllamm and illustrated by applying them to survey data on reading proficiency of children nested in schools. Simulations are used to assess the performance of various predictions and associated standard errors for logistic random-intercept models under a range of conditions.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Skrondal, Anders and Rabe-Hesketh, Sophia}, month = jun, year = {2009}, pages = {659--687} }, @article{lee_maximum_2001, title = {Maximum Likelihood Estimation of Two-Level Latent Variable Models with Mixed Continuous and Polytomous Data}, volume = {57}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/3068417}, abstract = {Two-level data with hierarchical structure and mixed continuous and polytomous data are very common in biomedical research. In this article, we propose a maximum likelihood approach for analyzing a latent variable model with these data. The maximum likelihood estimates are obtained by a Monte Carlo {EM} algorithm that involves the Gibbs sampler for approximating the E-step and the M-step and the bridge sampling for monitoring the convergence. The approach is illustrated by a two-level data set concerning the development and preliminary findings from an {AIDS} preventative intervention for Filipina commercial sex workers where the relationship between some latent quantities is investigated.}, number = {3}, journal = {Biometrics}, author = {Lee, Sik-Yum and Shi, Jian-Qing}, year = {2001}, pages = {787--794}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UNF7HWX9/Lee and Shi - 2001 - Maximum Likelihood Estimation of Two-Level Latent .pdf:application/pdf} }, @article{meng_em_1997, title = {The {EM} Algorithm--An Old Folk-Song Sung to a Fast New Tune}, volume = {59}, issn = {0035-9246}, url = {http://www.jstor.org/stable/2346009}, abstract = {Celebrating the 20th anniversary of the presentation of the paper by Dempster, Laird and Rubin which popularized the {EM} algorithm, we investigate, after a brief historical account, strategies that aim to make the {EM} algorithm converge faster while maintaining its simplicity and stability (e.g. automatic monotone convergence in likelihood). First we introduce the idea of a `working parameter' to facilitate the search for efficient data augmentation schemes and thus fast {EM} implementations. Second, summarizing various recent extensions of the {EM} algorithm, we formulate a general alternating expectation-conditional maximization algorithm {AECM} that couples flexible data augmentation schemes with model reduction schemes to achieve efficient computations. We illustrate these methods using multivariate t-models with known or unknown degrees of freedom and Poisson models for image reconstruction. We show, through both empirical and theoretical evidence, the potential for a dramatic reduction in computational time with little increase in human effort. We also discuss the intrinsic connection between {EM-type} algorithms and the Gibbs sampler, and the possibility of using the techniques presented here to speed up the latter. The main conclusion of the paper is that, with the help of statistical considerations, it is possible to construct algorithms that are simple, stable and fast.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}}, author = {Meng, Xiao-Li and Dyk, David van}, month = jan, year = {1997}, pages = {511--567}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/BFF8WKH6/Meng and Dyk - 1997 - The EM Algorithm--An Old Folk-Song Sung to a Fast .pdf:application/pdf} }, @article{pendergast_survey_1996, title = {A Survey of Methods for Analyzing Clustered Binary Response Data}, volume = {64}, issn = {0306-7734}, url = {http://www.jstor.org/stable/1403425}, doi = {10.2307/1403425}, abstract = {A comprehensive survey of regression-type models for clusters of correlated binary outcomes, including longitudinal data, is presented. In particular, we focus on models which can accommodate both between- and within-cluster categorical and continuous covariates. Emphasis is given to motivation of the model specification, interrelationships among models, parameter testing and interpretation, estimation methods (including both likelihood and non-likelihood approaches), computational issues, availability of software and other implementation issues, and to the advantages and disadvantages of the various approaches. Models discussed include naïve and response feature models, conditionally specified models, marginal models, and cluster-specific models. Extensions to ordinal data and relationships to graphical representations of models are also discussed. /// Une étude exhaustive sur les problèmes de classification basées sur des modèles de régression appliqués à des données binaires corrélées (y inclus le cas de connées longitudinales) est présenté. En particulier, on se concentre sur les modèles pouvant incorporer à la fois des variables explicatives à l'intérieur des classes et entre les classes que ces variables soient catégorielles ou continues. Une emphase est mise sur les thèmes suivants: la motivation associées aux spécifications du modèle, les connexions parmi les modèles, les tests concernant les paramètres et leurs interprétations, les méthodes d'estimation (incluant les méthodes basées sur la vraisemblance et celles qui ne le sont pas), les problèmes reliés aux calculs, la disponibilité de logiciels, les avantages et les défauts de certaines approches. Les modèles discutés comprennent ceux où les réponses sont naïves et ceux dont les réponses sont prédeterminées, les modèles conditionnellement fixés, les modèles marginalement fixés, les modèles où les classes sont prédéterminées à l'avance. Des extensions aux données ordinales et des relations avec les représentations graphiques sont aussi discutées.}, number = {1}, journal = {International Statistical Review / Revue Internationale de Statistique}, author = {Pendergast, Jane F. and Gange, Stephen J. and Newton, Michael A. and Lindstrom, Mary J. and Palta, Mari and Fisher, Marian R.}, month = apr, year = {1996}, pages = {89--118}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JRVHNQ94/Pendergast et al. - 1996 - A Survey of Methods for Analyzing Clustered Binary.pdf:application/pdf} }, @article{goldstein_hierarchical_1995, title = {Hierarchical Data Modeling in the Social Sciences}, volume = {20}, issn = {1076-9986}, url = {http://www.jstor.org/stable/1165357}, doi = {10.2307/1165357}, number = {2}, journal = {Journal of Educational and Behavioral Statistics}, author = {Goldstein, Harvey}, month = jul, year = {1995}, pages = {201--204}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/N85RDHQS/Goldstein - 1995 - Hierarchical Data Modeling in the Social Sciences.pdf:application/pdf} }, @article{goldstein_restricted_1989, title = {Restricted Unbiased Iterative Generalized Least-Squares Estimation}, volume = {76}, issn = {0006-3444}, url = {http://www.jstor.org/stable/2336130}, doi = {10.2307/2336130}, abstract = {It is shown that the iterative least-squares procedure for estimating the parameters in a general multilevel random coefficients linear model can be modified to produce unbiased estimates of the random parameters. In the multivariate normal case these are equivalent to restricted maximum likelihood estimates.}, number = {3}, journal = {Biometrika}, author = {Goldstein, Harvey}, year = {1989}, pages = {622--623}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/D4U9CC82/Goldstein - 1989 - Restricted Unbiased Iterative Generalized Least-Sq.pdf:application/pdf} }, @article{goldstein_multilevel_1991, title = {Multilevel Modelling of Survey Data}, volume = {40}, issn = {0039-0526}, url = {http://www.jstor.org/stable/2348496}, doi = {10.2307/2348496}, number = {2}, journal = {Journal of the Royal Statistical Society. Series D {(The} Statistician)}, author = {Goldstein, Harvey}, month = jan, year = {1991}, pages = {235--244}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/87NFSZFQ/Goldstein - 1991 - Multilevel Modelling of Survey Data.pdf:application/pdf} }, @article{paterson_new_1991, title = {New Statistical Methods for Analysing Social Structures: An Introduction to Multilevel Models}, volume = {17}, issn = {0141-1926}, shorttitle = {New Statistical Methods for Analysing Social Structures}, url = {http://www.jstor.org/stable/1500648}, abstract = {An introductory account is given of developments in multilevel modelling of educational and other social data. The technique is introduced with some simple examples and its importance is explained. Examples of applications in a number of areas are given, including repeated measures designs, school effectiveness studies, area-based studies and political opinion sample surveys. Almost all data collected in the social sciences have some form of inherent hierarchical structure, and this structure should be reflected in the statistical models that are used to analyse them. It is suggested that multilevel techniques and associated software packages have reached the stage when they can and should be applied routinely in the analysis of social data, and that failure to do so can result in potentially serious misinterpretations.}, number = {4}, journal = {British Educational Research Journal}, author = {Paterson, Lindsay and Goldstein, Harvey}, month = jan, year = {1991}, pages = {387--393}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/GG83DFCP/Paterson and Goldstein - 1991 - New Statistical Methods for Analysing Social Struc.pdf:application/pdf} }, @article{jon_rasbash_efficient_1994, title = {Efficient Analysis of Mixed Hierarchical and Cross-Classified Random Structures Using a Multilevel Model}, volume = {19}, issn = {1076-9986}, url = {http://www.jstor.org/stable/1165397}, doi = {10.2307/1165397}, abstract = {An efficient and straightforward procedure is described for specifying and estimating parameters of general mixed models which contain both hierarchical and crossed random factors. This is done using a model formulated for purely hierarchically structured data and generalizes the results of Raudenbush (1993). The exposition is for the continuous response linear model with natural extensions to generalized linear, nonlinear, and multivariate models.}, number = {4}, journal = {Journal of Educational and Behavioral Statistics}, author = {Jon Rasbash and Goldstein, Harvey}, month = dec, year = {1994}, pages = {337--350}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/5XF963DD/Jon Rasbash and Goldstein - 1994 - Efficient Analysis of Mixed Hierarchical and Cross.pdf:application/pdf} }, @article{yang_multilevel_2000, title = {Multilevel Models for Repeated Binary Outcomes: Attitudes and Voting over the Electoral Cycle}, volume = {163}, issn = {0964-1998}, shorttitle = {Multilevel Models for Repeated Binary Outcomes}, url = {http://www.jstor.org/stable/2680508}, abstract = {Models for fitting longitudinal binary responses are explored by using a panel study of voting intentions. A standard multilevel repeated measures logistic model is shown to be inadequate owing to a substantial proportion of respondents who maintain a constant response over time. A multivariate binary response model is shown to be a better fit to the data.}, number = {1}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Yang, Min and Goldstein, Harvey and Heath, Anthony}, month = jan, year = {2000}, note = {{ArticleType:} research-article / Full publication date: 2000 / Copyright © 2000 Royal Statistical Society}, pages = {49--62}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/I56ISGMW/Yang et al. - 2000 - Multilevel Models for Repeated Binary Outcomes At.pdf:application/pdf} }, @article{goldstein_multilevel_2007, title = {Multilevel Structural Equation Models for the Analysis of Comparative Data on Educational Performance}, volume = {32}, issn = {1076-9986}, url = {http://www.jstor.org/stable/20172084}, abstract = {The Programme for International Student Assessment comparative study of reading performance among 15-year-olds is reanalyzed using statistical procedures that allow the full complexity of the data structures to be explored. The article extends existing multilevel factor analysis and structural equation models and shows how this can extract richer information from the data and provide better fits to the data. It shows how these models can be used fully to explore the dimensionality of the data and to provide efficient, single-stage models that avoid the need for multiple imputation procedures. Markov Chain Monte Carlo methodology for parameter estimation is described.}, number = {3}, journal = {Journal of Educational and Behavioral Statistics}, author = {Goldstein, Harvey and Bonnet, Gérard and Rocher, Thierry}, year = {2007}, pages = {252--286}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QMKPMZ65/Goldstein et al. - 2007 - Multilevel Structural Equation Models for the Anal.pdf:application/pdf} }, @article{yang_multivariate_2002, title = {Multivariate Multilevel Analyses of Examination Results}, volume = {165}, issn = {0964-1998}, url = {http://www.jstor.org/stable/3559765}, abstract = {In the study of examination results much interest centres on comparisons of curriculum subjects entered and the correlation between these at individual and institution level based on data where not every individual takes all subjects. Such 'missing' data are not missing at random because individuals deliberately select subjects that they wish to study according to criteria that will be associated with their performance. In this paper we propose multivariate multilevel models for the analysis of such data, adjusting for such subject selection effects as well as for prior achievement. This then enables more appropriate institutional comparisons and correlation estimates. We analyse A- and {AS-level} results in different mathematics papers of 52 587 students from 2592 institutions in England in 1997. Although this paper is concerned largely with methodology, substantive findings emerge on the effects of gender, age, intakes of General Certificate of Education pupils, examination board and establishment type for A- and {AS-level} mathematics.}, number = {1}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Yang, Min and Goldstein, Harvey and Browne, William and Woodhouse, Geoffrey}, month = jan, year = {2002}, note = {{ArticleType:} research-article / Full publication date: 2002 / Copyright © 2002 Royal Statistical Society}, pages = {137--153}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/FFXUPZPQ/Yang et al. - 2002 - Multivariate Multilevel Analyses of Examination Re.pdf:application/pdf} }, @article{goldstein_multilevel_2000, title = {Multilevel Models in the Study of Dynamic Household Structures}, volume = {16}, issn = {0168-6577}, url = {http://www.jstor.org/stable/20164120}, abstract = {A modelling procedure is proposed for complex, dynamic household data structures where households change composition over time. Multilevel multiple membership models are presented for such data and their application is discussed with an example. /// Une procédure de modélisation est proposée pour des données dynamiques sur la structure des ménages dont la composition change au cours du temps. Des modèles multiniveaux à adhésion multiple sont présentés pour de telles données et leur application est discutée sur un exemple précis.}, number = {4}, journal = {European Journal of Population / Revue Européenne de Démographie}, author = {Goldstein, Harvey and Jon Rasbash and Browne, William and Woodhouse, Geoffrey and Poulain, Michel}, month = dec, year = {2000}, pages = {373--387}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XBHJ982G/Goldstein et al. - 2000 - Multilevel Models in the Study of Dynamic Househol.pdf:application/pdf} }, @article{langford_multilevel_1999, title = {Multilevel Modelling of the Geographical Distributions of Diseases}, volume = {48}, issn = {0035-9254}, url = {http://www.jstor.org/stable/2680801}, abstract = {Multilevel modelling is used on problems arising from the analysis of spatially distributed health data. We use three applications to demonstrate the use of multilevel modelling in this area. The first concerns small area all-cause mortality rates from Glasgow where spatial autocorrelation between residuals is examined. The second analysis is of prostate cancer cases in Scottish counties where we use a range of models to examine whether the incidence is higher in more rural areas. The third develops a multiple-cause model in which deaths from cancer and cardiovascular disease in Glasgow are examined simultaneously in a spatial model. We discuss some of the issues surrounding the use of complex spatial models and the potential for future developments.}, number = {2}, journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)}, author = {Langford, Ian H. and Leyland, Alastair H. and Jon Rasbash and Goldstein, Harvey}, month = jan, year = {1999}, pages = {253--268}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XMJNBCHK/Langford et al. - 1999 - Multilevel Modelling of the Geographical Distribut.pdf:application/pdf} }, @article{goldstein_modelling_2003, title = {Modelling Social Segregation}, volume = {29}, issn = {0305-4985}, url = {http://www.jstor.org/stable/1050612}, abstract = {This paper proposes a multilevel modelling approach to the analysis of social segregation in schools. Using data on free school meal eligibility it shows that the underlying variation between schools for the period 1994-1999 has increased. It also shows that the change is greater for selective than non-selective local education authorities {(LEAs).} It is suggested that the approach of this paper can be applied generally to the modelling of social segregation at institution level.}, number = {2}, journal = {Oxford Review of Education}, author = {Goldstein, Harvey and Noden, Philip}, month = jun, year = {2003}, pages = {225--237}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/BKAFGTTW/Goldstein and Noden - 2003 - Modelling Social Segregation.pdf:application/pdf} }, @article{goldstein_multilevel_2009, title = {Multilevel Multivariate Modelling of Childhood Growth, Numbers of Growth Measurements and Adult Characteristics}, volume = {172}, issn = {0964-1998}, url = {http://www.jstor.org/stable/20622526}, abstract = {A general latent normal model for multilevel data with mixtures of response types is extended in the case of ordered responses to deal with variates having a large number of categories and including count data. An example is analysed by using repeated measures data on child growth and adult measures of body mass index and glucose. Applications are described that are concerned with the flexible prediction of adult measurements from collections of growth measurements and for studying the relationship between the number of measurement occasions and growth trajectories.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series A {(Statistics} in Society)}, author = {Goldstein, Harvey and Kounali, Daphne}, month = jun, year = {2009}, note = {{ArticleType:} research-article / Issue Title: Recent Advances in Multilevel Modelling Methodology and Applications / Full publication date: Jun., 2009 / Copyright © 2009 Royal Statistical Society}, pages = {599--613} }, @article{gill_regression_1988, title = {Regression Analysis for Incomplete Mixed Cross-Section and Time-Series Data by a Modified {EM} Algorithm}, volume = {50}, issn = {0581-5738}, url = {http://www.jstor.org/stable/25052526}, abstract = {An iterative method is proposed for estimating a certain regression model with mixed cross-section and time-series data, where each observational unit is not necessarily available at each time point of the time series. We give theorems on consistency and asymptotic normality of estimators of the regression coefficients as the size of the cross-section increases while the length of the time series remains bounded. We discuss the connection between our method and the {EM} algorithm.}, number = {1}, journal = {Sankhyā: The Indian Journal of Statistics, Series B (1960-2002)}, author = {Gill, Richard D.}, month = apr, year = {1988}, pages = {95--102}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/M6HUFEX8/Gill - 1988 - Regression Analysis for Incomplete Mixed Cross-Sec.pdf:application/pdf} }, @article{walker_em_1996, title = {An {EM} Algorithm for Nonlinear Random Effects Models}, volume = {52}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2533054}, doi = {10.2307/2533054}, abstract = {An {EM} algorithm for exact maximum likelihood estimation of a class of nonlinear random effects models is given. The M-steps are shown to be analytically tractable and the E-steps are evaluated using Monte Carlo integration. The algorithm depends on the specification of the missing data which is taken to be the random effects.}, number = {3}, journal = {Biometrics}, author = {Walker, Stephen}, year = {1996}, pages = {934--944}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WGIMW7SR/Walker - 1996 - An EM Algorithm for Nonlinear Random Effects Model.pdf:application/pdf} }, @article{vonesh_generalized_2001, title = {Generalized Least Squares, Taylor Series Linearization, and Fisher's Scoring in Multivariate Nonlinear Regression}, volume = {96}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2670366}, abstract = {In this article, we consider a general multivariate nonlinear regression setting in which the marginal mean and variance-covariance structure share a common set of regression parameters. Estimation is carried out via iteratively reweighted generalized least squares {(IRGLS)} that entails repeated application of Taylor series linearization and estimated generalized least squares {(EGLS).} Under normality, this {IRGLS} procedure is equivalent to Fisher's method of scoring and hence maximum likelihood estimation {(MLE).} However, estimates from this procedure are also shown to minimize a bias-corrected generalized least squares objective function that does not require the assumption of normality. Under fairly mild regularity conditions, the resulting estimates are consistent, asymptotically normal, and-under normality assumptions-asymptotically efficient. The estimates are compared against those obtained as solutions to the usual generalized estimating equations {(GEE)} using both simulation and numerical examples.}, number = {453}, journal = {Journal of the American Statistical Association}, author = {Vonesh, Edward F. and Wang, Hao and Majumdar, Dibyen}, month = mar, year = {2001}, pages = {282--291}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/N2ZA4T35/Vonesh et al. - 2001 - Generalized Least Squares, Taylor Series Lineariza.pdf:application/pdf} }, @article{zeger_generalized_1991, title = {Generalized Linear Models With Random Effects; A Gibbs Sampling Approach}, volume = {86}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2289717}, doi = {10.2307/2289717}, abstract = {Generalized linear models have unified the approach to regression for a wide variety of discrete, continuous, and censored response variables that can be assumed to be independent across experimental units. In applications such as longitudinal studies, genetic studies of families, and survey sampling, observations may be obtained in clusters. Responses from the same cluster cannot be assumed to be independent. With linear models, correlation has been effectively modeled by assuming there are cluster-specific random effects that derive from an underlying mixing distribution. Extensions of generalized linear models to include random effects has, thus far, been hampered by the need for numerical integration to evaluate likelihoods. In this article, we cast the generalized linear random effects model in a Bayesian framework and use a Monte Carlo method, the Gibbs sampler, to overcome the current computational limitations. The resulting algorithm is flexible to easily accommodate changes in the number of random effects and in their assumed distribution when warranted. The methodology is illustrated through a simulation study and an analysis of infectious disease data.}, number = {413}, journal = {Journal of the American Statistical Association}, author = {Zeger, Scott L. and Karim, M. Rezaul}, month = mar, year = {1991}, pages = {79--86}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/PAFIJ22Z/Zeger and Karim - 1991 - Generalized Linear Models With Random Effects; A G.pdf:application/pdf} }, @article{pinheiro_approximations_1995, title = {Approximations to the Log-Likelihood Function in the Nonlinear Mixed-Effects Model}, volume = {4}, issn = {1061-8600}, url = {http://www.jstor.org/stable/1390625}, doi = {10.2307/1390625}, abstract = {Nonlinear mixed-effects models have received a great deal of attention in the statistical literature in recent years because of the flexibility they offer in handling the unbalanced repeated-measures data that arise in different areas of investigation, such as pharmacokinetics and economics. Several different methods for estimating the parameters in nonlinear mixed-effects model have been proposed. We concentrate here on two of them--maximum likelihood and restricted maximum likelihood. A rather complex numerical issue for (restricted) maximum likelihood estimation in nonlinear mixed-effects models is the evaluation of the log-likelihood function of the data, because it involves the evaluation of a multiple integral that, in most cases, does not have a closed-form expression. We consider here four different approximations to the log-likelihood, comparing their computational and statistical properties. We conclude that the linear mixed-effects {(LME)} approximation suggested by Lindstrom and Bates, the Laplacian approximation, and Gaussian quadrature centered at the conditional modes of the random effects are quite accurate and computationally efficient. Gaussian quadrature centered at the expected value of the random effects is quite inaccurate for a smaller number of abscissas and computationally inefficient for a larger number of abscissas. Importance sampling is accurate, but quite inefficient computationally.}, number = {1}, journal = {Journal of Computational and Graphical Statistics}, author = {Pinheiro, José C. and Bates, Douglas M.}, month = mar, year = {1995}, pages = {12--35}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/PIFTQNAC/Pinheiro and Bates - 1995 - Approximations to the Log-Likelihood Function in t.pdf:application/pdf} }, @article{dyk_art_2001, title = {The Art of Data Augmentation}, volume = {10}, issn = {1061-8600}, url = {http://www.jstor.org/stable/1391021}, abstract = {The term data augmentation refers to methods for constructing iterative optimization or sampling algorithms via the introduction of unobserved data or latent variables. For deterministic algorithms, the method was popularized in the general statistical community by the seminal article by Dempster, Laird, and Rubin on the {EM} algorithm for maximizing a likelihood function or, more generally, a posterior density. For stochastic algorithms, the method was popularized in the statistical literature by Tanner and Wong's Data Augmentation algorithm for posterior sampling and in the physics literature by Swendsen and Wang's algorithm for sampling from the Ising and Potts models and their generalizations; in the physics literature, the method of data augmentation is referred to as the method of auxiliary variables. Data augmentation schemes were used by Tanner and Wong to make simulation feasible and simple, while auxiliary variables were adopted by Swendsen and Wang to improve the speed of iterative simulation. In general, however, constructing data augmentation schemes that result in both simple and fast algorithms is a matter of art in that successful strategies vary greatly with the (observed-data) models being considered. After an overview of data augmentation/auxiliary variables and some recent developments in methods for constructing such efficient data augmentation schemes, we introduce an effective search strategy that combines the ideas of marginal augmentation and conditional augmentation, together with a deterministic approximation method for selecting good augmentation schemes. We then apply this strategy to three common classes of models (specifically, multivariate t, probit regression, and mixed-effects models) to obtain efficient Markov chain Monte Carlo algorithms for posterior sampling. We provide theoretical and empirical evidence that the resulting algorithms, while requiring similar programming effort, can show dramatic improvement over the Gibbs samplers commonly used for these models in practice. A key feature of all these new algorithms is that they are positive recurrent subchains of nonpositive recurrent Markov chains constructed in larger spaces.}, number = {1}, journal = {Journal of Computational and Graphical Statistics}, author = {Dyk, David A. van and Meng, Xiao-Li}, month = mar, year = {2001}, note = {{ArticleType:} research-article / Full publication date: Mar., 2001 / Copyright © 2001 American Statistical Association, Institute of Mathematical Statistics and Interface Foundation of America}, pages = {1--50}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/8V6S2U7V/Dyk and Meng - 2001 - The Art of Data Augmentation.pdf:application/pdf} }, @article{lindstrom_correction_1994, title = {Correction to Lindstrom and Bates (1988): Newton-Raphson and {EM} Algorithms for Linear Mixed Effects Models for Repeated Measures Data}, volume = {89}, issn = {0162-1459}, shorttitle = {Correction to Lindstrom and Bates (1988)}, url = {http://www.jstor.org/stable/2291042}, doi = {10.2307/2291042}, number = {428}, journal = {Journal of the American Statistical Association}, author = {Lindstrom, Mary J. and Bates, Douglas M.}, month = dec, year = {1994}, pages = {1572}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/44CV6IMS/Lindstrom and Bates - 1994 - Correction to Lindstrom and Bates (1988) Newton-R.pdf:application/pdf} }, @article{lindstrom_newton-raphson_1988, title = {Newton-Raphson and {EM} Algorithms for Linear Mixed-Effects Models for Repeated-Measures Data}, volume = {83}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2290128}, doi = {10.2307/2290128}, abstract = {We develop an efficient and effective implementation of the Newton-Raphson {(NR)} algorithm for estimating the parameters in mixed-effects models for repeated-measures data. We formulate the derivatives for both maximum likelihood and restricted maximum likelihood estimation and propose improvements to the algorithm discussed by Jennrich and Schluchter (1986) to speed convergence and ensure a positive-definite covariance matrix for the random effects at each iteration. We use matrix decompositions to develop efficient and computationally stable implementations of both the {NR} algorithm and an {EM} algorithm {(Laird} and Ware 1982) for this model. We compare the two methods {(EM} vs. {NR)} in terms of computational order and performance on two sample data sets and conclude that in most situations a well-implemented {NR} algorithm is preferable to the {EM} algorithm or {EM} algorithm with Aitken's acceleration. The term repeated measures refers to experimental designs where there are several individuals and several measurements taken on each individual. In the mixed-effects model each individual's vector of responses is modeled as a parametric function, where some of the parameters or "effects" are random variables with a multivariate normal distribution. This model has been successful because it can handle unbalanced data (different designs for different individuals), missing data (observations on all individuals are taken at the same design points, but some individuals have missing data), and jointly dependent random effects. The price for this flexibility is that the parameter estimates may be difficult to compute. We propose some new methods for implementing the {EM} and {NR} algorithms and draw conclusions about their performance. We also discuss extensions of the mixed-effects model to incorporate nonindependent conditional error structure and nested-type designs.}, number = {404}, journal = {Journal of the American Statistical Association}, author = {Lindstrom, Mary J. and Bates, Douglas M.}, month = dec, year = {1988}, pages = {1014--1022}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/72B4G6M9/Lindstrom and Bates - 1988 - Newton-Raphson and EM Algorithms for Linear Mixed-.pdf:application/pdf} }, @article{lindstrom_nonlinear_1990, title = {Nonlinear Mixed Effects Models for Repeated Measures Data}, volume = {46}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2532087}, doi = {10.2307/2532087}, abstract = {We propose a general, nonlinear mixed effects model for repeated measures data and define estimators for its parameters. The proposed estimators are a natural combination of least squares estimators for nonlinear fixed effects models and maximum likelihood (or restricted maximum likelihood) estimators for linear mixed effects models. We implement Newton-Raphson estimation using previously developed computational methods for nonlinear fixed effects models and for linear mixed effects models. Two examples are presented and the connections between this work and recent work on generalized linear mixed effects models are discussed.}, number = {3}, journal = {Biometrics}, author = {Lindstrom, Mary J. and Bates, Douglas M.}, year = {1990}, pages = {673--687}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/G8CKNUIE/Lindstrom and Bates - 1990 - Nonlinear Mixed Effects Models for Repeated Measur.pdf:application/pdf} }, @article{bates_[semiparametric_2001, title = {{[Semiparametric} Nonlinear Mixed-Effects Models and Their Applications]: Comment}, volume = {96}, issn = {0162-1459}, shorttitle = {{[Semiparametric} Nonlinear Mixed-Effects Models and Their Applications]}, url = {http://www.jstor.org/stable/3085899}, number = {456}, journal = {Journal of the American Statistical Association}, author = {Bates, Douglas M. and Lindstrom, Mary J. and Wahba, Grace}, month = dec, year = {2001}, pages = {1292--1293}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/53X7AWF6/Bates et al. - 2001 - [Semiparametric Nonlinear Mixed-Effects Models and.pdf:application/pdf} }, @article{li_new_2007, title = {A New Estimation Procedure for a Partially Nonlinear Model via a Mixed-Effects Approach}, volume = {35}, issn = {0319-5724}, url = {http://www.jstor.org/stable/20445264}, abstract = {The authors consider the estimation of the parametric component of a partially nonlinear semiparametric regression model whose nonparametric component is viewed as a nuisance parameter. They show how estimation can proceed through a nonlinear mixed-effects model approach. They prove that under certain regularity conditions, the proposed estimate is consistent and asymptotically Gaussian. They investigate its finite-sample properties through simulations and illustrate its use with data on the relation between the photosynthetically active radiation and the net ecosystem-atmosphere exchange of carbon dioxide. /// Les auteurs s'intéressent à l'estimation de la partie paramétrique d'un modèle de régression semiparamétrique partiellement non linéaire dont la composante non paramétrique est considérée comme nuisible. Ils montrent comment l'estimation est possible au moyen d'un modèle non linéaire à effets mixtes. Ils démontrent que sous certaines conditions de régularité, l'estimateur proposé est convergent et asymptotiquement gaussien. Ils en étudient le comportement à taille finie au moyen de simulations et en illustrent l'emploi à l'aide de données concernant le rayonnement absorbé par photosynthèse en relation avec le bilan des échanges en bioxyde de carbone entre l'atmosphère et l'écosystème.}, number = {3}, journal = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique}, author = {Li, Runze and Nie, Lei}, year = {2007}, pages = {399--411}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/4ZC3T6Q7/Li and Nie - 2007 - A New Estimation Procedure for a Partially Nonline.pdf:application/pdf} }, @article{lombardia_semiparametric_2008, title = {Semiparametric Inference in Generalized Mixed Effects Models}, volume = {70}, issn = {1369-7412}, url = {http://www.jstor.org/stable/20203863}, abstract = {The paper presents a study of the generalized partially linear model including random effects in its linear part. We propose an estimator that combines likelihood approaches for mixed effects models, with kernel methods. Following the methodology of Härdle and co-workers, we introduce a test for the hypothesis of a parametric mixed effects model against the alternative of a semiparametric mixed effects model. The critical values are estimated by using a bootstrap procedure. The asymptotic theory for the methods is provided, as are the results of a simulation study. These verify the feasibility and the excellent behaviour of the methods for samples of even moderate size. The usefulness of the methodology is illustrated with an application in which the objective is to estimate forest coverage in Galicia, Spain.}, number = {5}, journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)}, author = {Lombardía, María José and Sperlich, Stefan}, month = nov, year = {2008}, pages = {913--930} }, @article{liu_simultaneous_2007, title = {Simultaneous Inference for Semiparametric Nonlinear Mixed-Effects Models with Covariate Measurement Errors and Missing Responses}, volume = {63}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/4541344}, abstract = {Semiparametric nonlinear mixed-effects {(NLME)} models are flexible for modeling complex longitudinal data. Covariates are usually introduced in the models to partially explain interindividual variations. Some covariates, however, may be measured with substantial errors. Moreover, the responses may be missing and the missingness may be nonignorable. We propose two approximate likelihood methods for semiparametric {NLME} models with covariate measurement errors and nonignorable missing responses. The methods are illustrated in a real data example. Simulation results show that both methods perform well and are much better than the commonly used naive method. /// Les modèles semi-paramétriques mixtes non linéaires {(NLME}, {≪NonLinear} Mixed-Effects≫) offrent une grande souplesse dans l'étude de données longitudinales avec des structures complexes. Les covariables sont habituellement introduites dans ces modèles afin d'expliquer une partie des variations inter-individuelles, Toutefois certaines covariables sont mesurées avec des erreurs conséquentes. Par ailleurs les réponses peuvent être manquantes avec une censure informative. Nous proposons deux méthodes basées sur la vraisemblance pour des modèles semi-paramétriques {NLME} en présence d'erreurs de mesures sur des covariables et des données manquantes de type ≪non ignorables≫. Les méthodes sont illustrées à partir d'un jeu réel de données. Les résultats obtenus par simulation mettent en évidence de bonnes performances des deux méthodes, bien meilleures que la méthode ≪naïve≫ communément utilisée.}, number = {2}, journal = {Biometrics}, author = {Liu, Wei and Wu, Lang}, month = jun, year = {2007}, pages = {342--350} }, @article{zhao_general_2006, title = {General Design Bayesian Generalized Linear Mixed Models}, volume = {21}, issn = {0883-4237}, url = {http://www.jstor.org/stable/27645735}, abstract = {Linear mixed models are able to handle an extraordinary range of complications in regression-type analyses. Their most common use is to account for within-subject correlation in longitudinal data analysis. They are also the standard vehicle for smoothing spatial count data. However, when treated in full generality, mixed models can also handle spline-type smoothing and closely approximate kriging. This allows for nonparametric regression models (e.g., additive models and varying coefficient models) to be handled within the mixed model framework. The key is to allow the random effects design matrix to have general structure; hence our label general design. For continuous response data, particularly when Gaussianity of the response is reasonably assumed, computation is now quite mature and supported by the R, {SAS} and {S—PLUS} packages. Such is not the case for binary and count responses, where generalized linear mixed models {(GLMMs)} are required, but are hindered by the presence of intractable multivariate integrals. Software known to us supports special cases of the {GLMM} (e.g., {PROC} {NLMIXED} in {SAS} or {glmmML} in R) or relies on the sometimes crude Laplace-type approximation of integrals (e.g., the {SAS} macro glimmix or {glmmPQL} in R). This paper describes the fitting of general design generalized linear mixed models. A Bayesian approach is taken and Markov chain Monte Carlo {(MCMC)} is used for estimation and inference. In this generalized setting, {MCMC} requires sampling from nonstandard distributions. In this article, we demonstrate that the {MCMC} package {WinBUGS} facilitates sound fitting of general design Bayesian generalized linear mixed models in practice.}, number = {1}, journal = {Statistical Science}, author = {Zhao, Y. and Staudenmayer, J. and Coull, B. A. and Wand, M. P.}, month = feb, year = {2006}, pages = {35--51}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/HT3ZRMXU/Zhao et al. - 2006 - General Design Bayesian Generalized Linear Mixed M.pdf:application/pdf} }, @article{rigby_generalized_2005, title = {Generalized Additive Models for Location, Scale and Shape}, volume = {54}, issn = {0035-9254}, url = {http://www.jstor.org/stable/3592732}, abstract = {A general class of statistical models for a univariate response variable is presented which we call the generalized additive model for location, scale and shape {(GAMLSS).} The model assumes independent observations of the response variable y given the parameters, the explanatory variables and the values of the random effects. The distribution for the response variable in the {GAMLSS} can be selected from a very general family of distributions including highly skew or kurtotic continuous and discrete distributions. The systematic part of the model is expanded to allow modelling not only of the mean (or location) but also of the other parameters of the distribution of y, as parametric and/or additive nonparametric (smooth) functions of explanatory variables and/or random-effects terms. Maximum (penalized) likelihood estimation is used to fit the (non)parametric models. A Newton-Raphson or Fisher scoring algorithm is used to maximize the (penalized) likelihood. The additive terms in the model are fitted by using a backfitting algorithm. Censored data are easily incorporated into the framework. Five data sets from different fields of application are analysed to emphasize the generality of the {GAMLSS} class of models.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series C {(Applied} Statistics)}, author = {Rigby, R. A. and Stasinopoulos, D. M.}, month = jan, year = {2005}, pages = {507--554}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/RECEPTRR/Rigby and Stasinopoulos - 2005 - Generalized Additive Models for Location, Scale an.pdf:application/pdf} }, @article{bolfarine_influence_2007, title = {Influence Diagnostics for Skew-Normal Linear Mixed Models}, volume = {69}, issn = {0972-7671}, url = {http://www.jstor.org/stable/25664584}, abstract = {Normality (symmetry) of the random effects is a routine assumption in linear mixed models but it may, sometimes, be unrealistic, obscuring important features of among-subjects variation. We relax this assumption by assuming that the random effects density is skew-normal, considered as an extension of the univariate version proposed by Sahu, Dey and Branco {(CJS}, 2003). Following Zhu and Lee {(JRSSB}, 2001), we implement an {EM-type} algorithm to parameter estimation and then using the related conditional expectation of the complete-data log-likelihood function, develop diagnostic measures for implementing the local influence approach under four model perturbation schemes. Results obtained from simulated and real data sets are reported illustrating the usefulness of the approach.}, number = {4}, journal = {Sankhyā: The Indian Journal of Statistics (2003-2007)}, author = {Bolfarine, Heleno and Montenegro, Lourdes C. and Lachos, Victor H.}, month = nov, year = {2007}, pages = {648--670}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/UXETFGT7/Bolfarine et al. - 2007 - Influence Diagnostics for Skew-Normal Linear Mixed.pdf:application/pdf} }, @article{mcculloch_generalized_2003, title = {Generalized Linear Mixed Models}, volume = {7}, issn = {1935-5920}, url = {http://www.jstor.org/stable/4153190}, journal = {{NSF-CBMS} Regional Conference Series in Probability and Statistics}, author = {{McCulloch}, Charles E.}, month = jan, year = {2003}, pages = {i--84}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/TUQWN8TG/McCulloch - 2003 - Generalized Linear Mixed Models.pdf:application/pdf} }, @article{coull_crossed_2001, title = {Crossed Random Effect Models for Multiple Outcomes in a Study of Teratogenesis}, volume = {96}, issn = {0162-1459}, url = {http://www.jstor.org/stable/3085882}, abstract = {Human teratogens often manifest themselves through a broad spectrum of adverse effects. Although often not serious when considered individually, such outcomes taken together may represent a syndrome that can lead to serious developmental problems. Accordingly, studies that investigate the effect of human teratogens on fetal development typically record the presence or absence of a multitude of abnormalities, resulting in the data of multivariate binary form for each infant. Such studies typically have three objectives: (1) estimate an overall effect of exposure across outcomes, (2) identify subjects having the syndrome, and (3) identify those outcomes that constitute the syndrome so that doctors know what to look for when diagnosing the syndrome in other exposed newborns. This article proposes the use of a logistic regression model with crossed random effect structure to address all three questions simultaneously. We use the proposed models to analyze data from a study investigating the effects of in utero antiepileptic drug exposure on fetal development.}, number = {456}, journal = {Journal of the American Statistical Association}, author = {Coull, Brent A. and Hobert, James P. and Ryan, Louise M. and Holmes, Lewis B.}, month = dec, year = {2001}, pages = {1194--1204}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QH6NU944/Coull et al. - 2001 - Crossed Random Effect Models for Multiple Outcomes.pdf:application/pdf} }, @article{lee_hierarchical_1996, title = {Hierarchical Generalized Linear Models}, volume = {58}, issn = {0035-9246}, url = {http://www.jstor.org/stable/2346105}, abstract = {We consider hierarchical generalized linear models which allow extra error components in the linear predictors of generalized linear models. The distribution of these components is not restricted to be normal; this allows a broader class of models, which includes generalized linear mixed models. We use a generalization of Henderson's joint likelihood, called a hierarchical or h-likelihood, for inferences from hierarchical generalized linear models. This avoids the integration that is necessary when marginal likelihood is used. Under appropriate conditions maximizing the h-likelihood gives fixed effect estimators that are asymptotically equivalent to those obtained from the use of marginal likelihood; at the same time we obtain the random effect estimates that are asymptotically best unbiased predictors. An adjusted profile h-likelihood is shown to give the required generalization of restricted maximum likelihood for the estimation of dispersion components. A scaled deviance test for the goodness of fit, a model selection criterion for choosing between various dispersion models and a graphical method for checking the distributional assumption of random effects are proposed. The ideas of quasi-likelihood and extended quasi-likelihood are generalized to the new class. We give examples of the Poisson-gamma, binomial-beta and gamma-inverse gamma hierarchical generalized linear models. A resolution is proposed for the apparent difference between population-averaged and subject-specific models. A unified framework is provided for viewing and extending many existing methods.}, number = {4}, journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}}, author = {Lee, Y. and Nelder, J. A.}, month = jan, year = {1996}, pages = {619--678}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/VSUHIJ5K/Lee and Nelder - 1996 - Hierarchical Generalized Linear Models.pdf:application/pdf} }, @article{lin_semiparametric_2006, title = {Semiparametric Estimation in General Repeated Measures Problems}, volume = {68}, issn = {1369-7412}, url = {http://www.jstor.org/stable/3647557}, abstract = {The paper considers a wide class of semiparametric problems with a parametric part for some covariate effects and repeated evaluations of a nonparametric function. Special cases in our approach include marginal models for longitudinal or clustered data, conditional logistic regression for matched case-control studies, multivariate measurement error models, generalized linear mixed models with a semiparametric component, and many others. We propose profile kernel and backfitting estimation methods for these problems, derive their asymptotic distributions and show that in likelihood problems the methods are semiparametric efficient. Although generally not true, it transpires that with our methods profiling and backfitting are asymptotically equivalent. We also consider pseudolikelihood methods where some nuisance parameters are estimated from a different algorithm. The methods proposed are evaluated by using simulation studies and applied to the Kenya haemoglobin data.}, number = {1}, journal = {Journal of the Royal Statistical Society. Series B {(Statistical} Methodology)}, author = {Lin, Xihong and Carroll, Raymond J.}, month = jan, year = {2006}, pages = {69--88}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/VQ9DGFDX/Lin and Carroll - 2006 - Semiparametric Estimation in General Repeated Meas.pdf:application/pdf} }, @article{chatterjee_parametric_2008, title = {Parametric Bootstrap Approximation to the Distribution of {EBLUP} and Related Prediction Intervals in Linear Mixed Models}, volume = {36}, issn = {0090-5364}, url = {http://www.jstor.org/stable/25464665}, abstract = {Empirical best linear unbiased prediction {(EBLUP)} method uses a linear mixed model in combining information from different sources of information. This method is particularly useful in small area problems. The variability of an {EBLUP} is traditionally measured by the mean squared prediction error {(MSPE)}, and interval estimates are generally constructed using estimates of the {MSPE.} Such methods have shortcomings like under-coverage or over-coverage, excessive length and lack of interpretability. We propose a parametric bootstrap approach to estimate the entire distribution of a suitably centered and scaled {EBLUP.} The bootstrap histogram is highly accurate, and differs from the true {EBLUP} distribution by only {\$O(d{\textasciicircum}{3}n{\textasciicircum}{-3/2})\$}, where d is the number of parameters and n the number of observations. This result is used to obtain highly accurate prediction intervals. Simulation results demonstrate the superiority of this method over existing techniques of constructing prediction intervals in linear mixed models.}, number = {3}, journal = {The Annals of Statistics}, author = {Chatterjee, Snigdhansu and Lahiri, Partha and Li, Huilin}, month = jun, year = {2008}, note = {{ArticleType:} research-article / Full publication date: Jun., 2008 / Copyright © 2008 Institute of Mathematical Statistics}, pages = {1221--1245}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/C2KA2PCX/Chatterjee et al. - 2008 - Parametric Bootstrap Approximation to the Distribu.pdf:application/pdf} }, @article{qiu_simplex_2008, title = {Simplex Mixed-Effects Models for Longitudinal Proportional Data}, volume = {35}, issn = {0303-6898}, url = {http://www.jstor.org/stable/41000290}, abstract = {Continuous proportional outcomes are collected from many practical studies, where responses are confined within the unit interval (0,1). Utilizing Barndorff-Nielsen and Jorgensen's simplex distribution, we propose a new type of generalized linear mixed-effects model for longitudinal proportional data, where the expected value of proportion is directly modelled through a logit function of fixed and random effects. We establish statistical inference along the lines of Breslow and Clayton's penalized quasi-likelihood {(PQL)} and restricted maximum likelihood {(REML)} in the proposed model. We derive the {PQL/REML} using the high-order multivariate Laplace approximation, which gives satisfactory estimation of the model parameters. The proposed model and inference are illustrated by simulation studies and a data example. The simulation studies conclude that the fourth order approximate {PQL/REML} performs satisfactorily. The data example shows that Aitchison's technique of the normal linear mixed model for logit-transformed proportional outcomes is not robust against outliers.}, number = {4}, journal = {Scandinavian Journal of Statistics}, author = {{QIU}, {ZHENGUO} and {SONG}, {PETER} X.-K. and {TAN}, {MING}}, month = dec, year = {2008}, pages = {577--596} }, @article{muller_semiparametric_2007, title = {Semiparametric Bayesian Inference for Multilevel Repeated Measurement Data}, volume = {63}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/4541324}, abstract = {We discuss inference for data with repeated measurements at multiple levels. The motivating example is data with blood counts from cancer patients undergoing multiple cycles of chemotherapy, with days nested within cycles. Some inference questions relate to repeated measurements over days within cycle, while other questions are concerned with the dependence across cycles. When the desired inference relates to both levels of repetition, it becomes important to reflect the data structure in the model. We develop a semiparametric Bayesian modeling approach, restricting attention to two levels of repeated measurements. For the top-level longitudinal sampling model we use random effects to introduce the desired dependence across repeated measurements. We use a nonparametric prior for the random effects distribution. Inference about dependence across second-level repetition is implemented by the clustering implied in the nonparametric random effects model. Practical use of the model requires that the posterior distribution on the latent random effects be reasonably precise. /// Nous discutons de l'inférence pour des données de mesures répétées à plusieurs niveaux. L'exemple qui motive ce travail consiste en des données de comptage sanguin pour des patients subissant une chimiothérapie comportant plusieurs cycles, avec les jours emboîtés dans les cycles. Certaines questions d'inférence sont reliées aux mesures répétées à l'intérieur d'un cycle tandis que d'autres concernent la dépendance entre cycles. Si l'inférence désirée se rapporte à tous les niveaux de répétition, il devient important de refléter la structure des données dans le modèle. Nous développons une approche bayésienne semi-paramétrique en restreignant l'attention aux deux niveaux de mesures répétées. Pour le niveau le plus haut de l'échantillonnage longitudinal nous utilisons des effets aléatoires pour introduire la dépendance désirée entre les mesures répétées. Nous utilisons un a priori non paramétrique pour la distribution des effets aléatoires. L'inférence sur le second niveau de répétitions est rendue possible par l'effet de groupes impliqué dans le modèle à effets aléatoires non paramétriques. L'utilisation pratique du modèle requiert que la distribution a posteriori sur les effets aléatoires latents soit raisonnablement précise.}, number = {1}, journal = {Biometrics}, author = {Müller, Peter and Quintana, Fernando A. and Rosner, Gary L.}, month = mar, year = {2007}, pages = {280--289} }, @article{stubbendick_maximum_2003, title = {Maximum Likelihood Methods for Nonignorable Missing Responses and Covariates in Random Effects Models}, volume = {59}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/3695356}, abstract = {This article analyzes quality of life {(QOL)} data from an Eastern Cooperative Oncology Group {(ECOG)} melanoma trial that compared treatment with ganglioside vaccination to treatment with high-dose interferon. The analysis of this data set is challenging due to several difficulties, namely, nonignorable missing longitudinal responses and baseline covariates. Hence, we propose a selection model for estimating parameters in the normal random effects model with nonignorable missing responses and covariates. Parameters are estimated via maximum likelihood using the Gibbs sampler and a Monte Carlo expectation maximization {(EM)} algorithm. Standard errors are calculated using the bootstrap. The method allows for nonmonotone patterns of missing data in both the response variable and the covariates. We model the missing data mechanism and the missing covariate distribution via a sequence of one-dimensional conditional distributions, allowing the missing covariates to be either categorical or continuous, as well as time-varying. We apply the proposed approach to the {ECOG} quality-of-life data and conduct a small simulation study evaluating the performance of the maximum likelihood estimates. Our results indicate that a patient treated with the vaccine has a higher {QOL} score on average at a given time point than a patient treated with high-dose interferon.}, number = {4}, journal = {Biometrics}, author = {Stubbendick, Amy L. and Ibrahim, Joseph G.}, month = dec, year = {2003}, pages = {1140--1150}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/79VWA33P/Stubbendick and Ibrahim - 2003 - Maximum Likelihood Methods for Nonignorable Missin.pdf:application/pdf} }, @article{ibrahim_incomplete_1990, title = {Incomplete Data in Generalized Linear Models}, volume = {85}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2290013}, doi = {10.2307/2290013}, abstract = {This article examines incomplete data for the class of generalized linear models, in which incompleteness is due to partially missing covariates on some observations. Under the assumption that the missing data are missing at random, it is shown that the E step of the {EM} algorithm for any generalized linear model can be expressed as a weighted complete data log-likelihood when the unobserved covariates are assumed to come from a discrete distribution with finite range. Expressing the E step in this manner allows for a straightforward maximization in the M step, thus leading to maximum likelihood estimates {(MLE's)} for the parameters. Asymptotic variances of the {MLE's} are also derived, and results are illustrated with two examples.}, number = {411}, journal = {Journal of the American Statistical Association}, author = {Ibrahim, Joseph G.}, year = {1990}, note = {{ArticleType:} research-article / Full publication date: Sep., 1990 / Copyright © 1990 American Statistical Association}, pages = {765--769}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/B6TEMH5N/Ibrahim - 1990 - Incomplete Data in Generalized Linear Models.pdf:application/pdf} }, @book{verbeke_linear_2009, title = {Linear Mixed Models for Longitudinal Data}, isbn = {9781441902993}, abstract = {This book provides a comprehensive treatment of linear mixed models for continuous longitudinal data. Next to model formulation, this edition puts major emphasis on exploratory data analysis for all aspects of the model, such as the marginal model, subject-specific profiles, and residual covariance structure. Further, model diagnostics and missing data receive extensive treatment. Sensitivity analysis for incomplete data is given a prominent place. Several variations to the conventional linear mixed model are discussed (a heterogeity model, conditional linear mid models). This book will be of interest to applied statisticians and biomedical researchers in industry, public health organizations, contract research organizations, and academia. The book is explanatory rather than mathematically rigorous. Most analyses were done with the {MIXED} procedure of the {SAS} software package, and many of its features are clearly elucidated. How3ever, some other commercially available packages are discussed as well. Great care has been taken in presenting the data analyses in a software-independent fashion. Geert Verbeke is Assistant Professor at the Biostistical Centre of the Katholieke Universiteit Leuven in Belgium. He received the {B.S.} degree in mathematics (1989) from the Katholieke Universiteit Leuven, the {M.S.} in biostatistics (1992) from the Limburgs Universitair Centrum, and earned a {Ph.D.} in biostatistics (1995) from the Katholieke Universiteit Leuven. Dr. Verbeke wrote his dissertation, as well as a number of methodological articles, on various aspects of linear mixed models for longitudinal data analysis. He has held visiting positions at the Gerontology Research Center and the Johns Hopkins University. Geert Molenberghs is Assistant Professor of Biostatistics at the Limburgs Universitair Centrum in Belgium. He received the {B.S.} degree in mathematics (1988) and a {Ph.D.} in biostatistics (1993) from the Universiteit Antwerpen. Dr. Molenberghs published methodological work on the analysis of non-response in clinical and epidemiological studies. He serves as an associate editor for Biometrics, Applied Statistics, and Biostatistics, and is an officer of the Belgian Statistical Society. He has held visiting positions at the Harvard School of Public Health.}, language = {en}, publisher = {Springer}, author = {Verbeke, Geert and Molenberghs, Geert}, month = apr, year = {2009}, keywords = {Mathematics / Probability \& Statistics / General} }, @book{suess_introduction_????, series = {Use R!}, title = {Introduction to Probability Simulation and Gibbs Sampling with R}, url = {http://www.springerlink.com/content/u07217/?MUD=MP}, author = {Suess, Eric A. and Trumbo, Bruce E.}, note = {{DOI:} 10.1007/978-0-387-68765-0} }, @article{bauer_estimating_2003, title = {Estimating Multilevel Linear Models as Structural Equation Models}, volume = {28}, issn = {1076-9986}, url = {http://www.jstor.org/stable/3701259}, abstract = {Multilevel linear models {(MLMs)} provide a powerful framework for analyzing data collected at nested or non-nested levels, such as students within classrooms. The current article draws on recent analytical and software advances to demonstrate that a broad class of {MLMs} may be estimated as structural equation models {(SEMs).} Moreover, within the {SEM} approach it is possible to include measurement models for predictors or outcomes, and to estimate the mediational pathways among predictors explicitly, tasks which are currently difficult with the conventional approach to multilevel modeling. The equivalency of the {SEM} approach with conventional methods for estimating {MLMs} is illustrated using empirical examples, including an example involving both multiple indicator latent factors for the outcomes and a causal chain for the predictors. The limitations of this approach for estimating {MLMs} are discussed and alternative approaches are considered.}, number = {2}, journal = {Journal of Educational and Behavioral Statistics}, author = {Bauer, Daniel J.}, month = jul, year = {2003}, pages = {135--167}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/JRTDUH4T/Bauer - 2003 - Estimating Multilevel Linear Models as Structural .pdf:application/pdf} }, @article{kaplan_model-based_1997, title = {A Model-Based Approach to Validating Education Indicators Using Multilevel Structural Equation Modeling}, volume = {22}, issn = {1076-9986}, url = {http://www.jstor.org/stable/1165288}, doi = {10.2307/1165288}, abstract = {This article considers an approach to validating the selection of education indicators by incorporating them into a multilevel structural model and using the estimates from that model to engage in policy-relevant simulations. Multilevel structural equation modeling was applied to a subsample of the first follow-up of the National Education Longitudinal Study of 1988 {(National} Center for Education Statistics, 1988) to demonstrate the potential of this approach. Focus of attention was on science education indicators. A within-school model of science achievement was linked to a between-school model of the academic press of the school. Separate estimation of these models revealed adequate fit to the data after minor modifications. The multilevel model also showed adequate fit to the data. Utilizing the reduced form of the full multilevel model, predictive validity of the model was studied by gauging movements in various outcome indicators as a function of changes in policy-relevant input indicators. The article closes with a discussion of the limitations of the proposed modeling approach, the potential for future model development, and the implications of this approach for quantitative modeling within the domain of education policy.}, number = {3}, journal = {Journal of Educational and Behavioral Statistics}, author = {Kaplan, David and Elliott, Pamela R.}, month = oct, year = {1997}, pages = {323--347}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/N8H9HAGV/Kaplan and Elliott - 1997 - A Model-Based Approach to Validating Education Ind.pdf:application/pdf} }, @article{windmeijer_endogeneity_1997, title = {Endogeneity in Count Data Models: An Application to Demand for Health Care}, volume = {12}, issn = {0883-7252}, shorttitle = {Endogeneity in Count Data Models}, url = {http://www.jstor.org/stable/2285250}, abstract = {The generalized method of moments {(GMM)} estimation technique is discussed for count data models with endogenous regressors. Count data models can be specified with additive or multiplicative errors. It is shown that, in general, a set of instruments is not orthogonal to both error types. Simultaneous equations with a dependent count variable often do not have a reduced form which is a simple function of the instruments. However, a simultaneous model with a count and a binary variable can only be logically consistent when the system is triangular. The {GMM} estimator is used in the estimation of a model explaining the number of visits to doctors, with as a possible endogenous regressor a self-reported binary health index. Further, a model is estimated, in stages, that includes latent health instead of the binary health index.}, number = {3}, journal = {Journal of Applied Econometrics}, author = {Windmeijer, F. A. G. and Silva, J. M. C. Santos}, month = may, year = {1997}, pages = {281--294}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/3KTVFAGZ/Windmeijer and Silva - 1997 - Endogeneity in Count Data Models An Application t.pdf:application/pdf} }, @article{marsh_academic_2005, title = {Academic Self-Concept, Interest, Grades, and Standardized Test Scores: Reciprocal Effects Models of Causal Ordering}, volume = {76}, issn = {0009-3920}, shorttitle = {Academic Self-Concept, Interest, Grades, and Standardized Test Scores}, url = {http://www.jstor.org/stable/3696511}, abstract = {Reciprocal effects models of longitudinal data show that academic self-concept is both a cause and an effect of achievement. In this study this model was extended to juxtapose self-concept with academic interest. Based on longitudinal data from 2 nationally representative samples of German 7th-grade students {(Study} 1: N = 5,649, M age = 13.4; Study 2: N = 2,264, M age = 13.7 years), prior self-concept significantly affected subsequent math interest, school grades, and standardized test scores, whereas prior math interest had only a small effect on subsequent math self-concept. Despite Stereotypic gender differences in means, linkages relating these constructs were invariant over gender. These results demonstrate the positive effects of academic self-concept on a variety of academic outcomes and integrate self-concept with the developmental motivation literature.}, number = {2}, journal = {Child Development}, author = {Marsh, Herbert W. and Trautwein, Ulrich and Lüdtke, Oliver and Köller, Olaf and Baumert, Jürgen}, month = mar, year = {2005}, pages = {397--416}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/XQIJ3SH2/Marsh et al. - 2005 - Academic Self-Concept, Interest, Grades, and Stand.pdf:application/pdf} }, @article{gordon_k._smyth_efficient_2002, title = {An Efficient Algorithm for {REML} in Heteroscedastic Regression}, volume = {11}, issn = {1061-8600}, url = {http://www.jstor.org/stable/1391164}, abstract = {This article considers {REML} (residual or restricted maximum likelihood) estimation for heteroscedastic linear models. An explicit algorithm is given for {REML} scoring which yields the {REML} estimates together with their standard errors and likelihood values. The algorithm includes a Levenberg-Marquardt restricted step modification that ensures that the {REML} likelihood increases at each iteration. This article shows how the complete computation, including the {REML} information matrix, may be carried out in O(n) operations.}, number = {4}, journal = {Journal of Computational and Graphical Statistics}, author = {Gordon K. Smyth}, month = dec, year = {2002}, pages = {836--847}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/E5JQHWTG/Gordon K. Smyth - 2002 - An Efficient Algorithm for REML in Heteroscedastic.pdf:application/pdf} }, @article{gordon_k._smyth_conditional_1996, title = {A Conditional Likelihood Approach to Residual Maximum Likelihood Estimation in Generalized Linear Models}, volume = {58}, issn = {0035-9246}, url = {http://www.jstor.org/stable/2345894}, abstract = {Residual maximum likelihood {(REML)} estimation is often preferred to maximum likelihood estimation as a method of estimating covariance parameters in linear models because it takes account of the loss of degrees of freedom in estimating the mean and produces unbiased estimating equations for the variance parameters. In this paper it is shown that {REML} has an exact conditional likelihood interpretation, where the conditioning is on an appropriate sufficient statistic to remove dependence on the nuisance parameters. This interpretation clarifies the motivation for {REML} and generalizes directly to non-normal models in which there is a low dimensional sufficient statistic for the fitted values. The conditional likelihood is shown to be well defined and to satisfy the properties of a likelihood function, even though this is not generally true when conditioning on statistics which depend on parameters of interest. Using the conditional likelihood representation, the concept of {REML} is extended to generalized linear models with varying dispersion and canonical link. Explicit calculation of the conditional likelihood is given for the one-way lay-out. A saddlepoint approximation for the conditional likelihood is also derived.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}}, author = {Gordon K. Smyth and Verbyla, Arunas P.}, month = jan, year = {1996}, pages = {565--572}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QF87QDHK/Gordon K. Smyth and Verbyla - 1996 - A Conditional Likelihood Approach to Residual Maxi.pdf:application/pdf} }, @article{leeuw_random_1986, title = {Random Coefficient Models for Multilevel Analysis}, volume = {11}, issn = {0362-9791}, url = {http://www.jstor.org/stable/1164848}, doi = {10.2307/1164848}, abstract = {We propose a possible statistical model for both contextual analysis and slopes as outcomes analysis. These techniques have been used in multilevel analysis for quite some time, but a precise specification of the regression models has not been given before. We formalize them by proposing a random coefficient regression model, and we investigate its statistical properties in some detail. Various estimation methods are reviewed and applied to a Dutch school-career example.}, number = {1}, journal = {Journal of Educational Statistics}, author = {Leeuw, Jan de and Kreft, Ita}, month = apr, year = {1986}, pages = {57--85}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/K7KHBE59/Leeuw and Kreft - 1986 - Random Coefficient Models for Multilevel Analysis.pdf:application/pdf} }, @article{goldstein_nonlinear_1991, title = {Nonlinear Multilevel Models, with an Application to Discrete Response Data}, volume = {78}, issn = {0006-3444}, url = {http://www.jstor.org/stable/2336894}, doi = {10.2307/2336894}, abstract = {A procedure is proposed for the analysis of multilevel nonlinear models using a linearization. The case of log linear models for discrete response data is studied in detail.}, number = {1}, journal = {Biometrika}, author = {Goldstein, Harvey}, month = mar, year = {1991}, pages = {45--51}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/QTPKQTQ6/Goldstein - 1991 - Nonlinear Multilevel Models, with an Application t.pdf:application/pdf} }, @article{goldstein_multilevel_1986, title = {Multilevel Mixed Linear Model Analysis Using Iterative Generalized Least Squares}, volume = {73}, issn = {0006-3444}, url = {http://www.jstor.org/stable/2336270}, doi = {10.2307/2336270}, abstract = {Models for the analysis of hierarchically structured data are discussed. An iterative generalized least squares estimation procedure is given and shown to be equivalent to maximum likelihood in the normal case. There is a discussion of applications to complex surveys, longitudinal data, and estimation in multivariate models with missing responses. An example is given using educational data.}, number = {1}, journal = {Biometrika}, author = {Goldstein, H.}, month = apr, year = {1986}, pages = {43--56}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/KW8AKC4B/Goldstein - 1986 - Multilevel Mixed Linear Model Analysis Using Itera.pdf:application/pdf} }, @article{harville_bayesian_1974, title = {Bayesian Inference for Variance Components Using Only Error Contrasts}, volume = {61}, issn = {0006-3444}, url = {http://www.jstor.org/stable/2334370}, doi = {10.2307/2334370}, abstract = {Patterson \& Thompson (1971) proposed estimating the variance components of a mixed analysis of variance model by maximizing the likelihood of a set of error contrasts. In the present paper, a convenient representation is obtained for that likelihood. It is shown that, from a Bayesian viewpoint, using only error contrasts to make inferences on variance components is equivalent to ignoring any prior information on the fixed effects and using all the data.}, number = {2}, journal = {Biometrika}, author = {Harville, David A.}, year = {1974}, pages = {383--385}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/WKSHDXRT/Harville - 1974 - Bayesian Inference for Variance Components Using O.pdf:application/pdf} }, @article{jennrich_unbalanced_1986, title = {Unbalanced Repeated-Measures Models with Structured Covariance Matrices}, volume = {42}, issn = {0006-{341X}}, url = {http://www.jstor.org/stable/2530695}, doi = {10.2307/2530695}, abstract = {The question of how to analyze unbalanced or incomplete repeated-measures data is a common problem facing analysts. We address this problem through maximum likelihood analysis using a general linear model for expected responses and arbitrary structural models for the within-subject covariances. Models that can be fit include standard univariate and multivariate models with incomplete data, random-effects models, and models with time-series and factor-analytic error structures. We describe Newton-Raphson and Fisher scoring algorithms for computing maximum likelihood estimates, and generalized {EM} algorithms for computing restricted and unrestricted maximum likelihood estimates. An example fitting several models to a set of growth data is included.}, number = {4}, journal = {Biometrics}, author = {Jennrich, Robert I. and Schluchter, Mark D.}, month = dec, year = {1986}, pages = {805--820}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/H8UBKZ6T/Jennrich and Schluchter - 1986 - Unbalanced Repeated-Measures Models with Structure.pdf:application/pdf} }, @article{harville_maximum_1977, title = {Maximum Likelihood Approaches to Variance Component Estimation and to Related Problems}, volume = {72}, issn = {0162-1459}, url = {http://www.jstor.org/stable/2286796}, doi = {10.2307/2286796}, abstract = {Recent developments promise to increase greatly the popularity of maximum likelihood {(ML)} as a technique for estimating variance components. Patterson and Thompson (1971) proposed a restricted maximum likelihood {(REML)} approach which takes into account the loss in degrees of freedom resulting from estimating fixed effects. Miller (1973) developed a satisfactory asymptotic theory for {ML} estimators of variance components. There are many iterative algorithms that can be considered for computing the {ML} or {REML} estimates. The computations on each iteration of these algorithms are those associated with computing estimates of fixed and random effects for given values of the variance components.}, number = {358}, journal = {Journal of the American Statistical Association}, author = {Harville, David A.}, month = jun, year = {1977}, pages = {320--338}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/6ET6TEMR/Harville - 1977 - Maximum Likelihood Approaches to Variance Componen.pdf:application/pdf} }, @article{welham_likelihood_1997, title = {Likelihood Ratio Tests for Fixed Model Terms Using Residual Maximum Likelihood}, volume = {59}, issn = {0035-9246}, url = {http://www.jstor.org/stable/2346019}, abstract = {Likelihood ratio tests for fixed model terms are proposed for the analysis of linear mixed models when using residual maximum likelihood estimation. Bartlett-type adjustments, using an approximate decomposition of the data, are developed for the test statistics. A simulation study is used to compare properties of the test statistics proposed, with or without adjustment, with a Wald test. A proposed test statistic constructed by dropping fixed terms from the full fixed model is shown to give a better approximation to the asymptotic χ$^{\textrm{2}}$-distribution than the Wald test for small data sets. Bartlett adjustment is shown to improve the χ$^{\textrm{2}}$-approximation for the proposed tests substantially.}, number = {3}, journal = {Journal of the Royal Statistical Society. Series B {(Methodological)}}, author = {Welham, S. J. and Thompson, R.}, month = jan, year = {1997}, note = {{ArticleType:} research-article / Full publication date: 1997 / Copyright © 1997 Royal Statistical Society}, pages = {701--714}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/Q2K27C3I/Welham and Thompson - 1997 - Likelihood Ratio Tests for Fixed Model Terms Using.pdf:application/pdf} }, @article{patterson_recovery_1971, title = {Recovery of Inter-Block Information when Block Sizes are Unequal}, volume = {58}, issn = {0006-3444}, url = {http://www.jstor.org/stable/2334389}, doi = {10.2307/2334389}, abstract = {A method is proposed for estimating intra-block and inter-block weights in the analysis of incomplete block designs with block sizes not necessarily equal. The method consists of maximizing the likelihood, not of all the data, but of a set of selected error contrasts. When block sizes are equal results are identical with those obtained by the method of Nelder (1968) for generally balanced designs. Although mainly concerned with incomplete block designs the paper also gives in outline an extension of the modified maximum likelihood procedure to designs with a more complicated block structure.}, number = {3}, journal = {Biometrika}, author = {Patterson, H. D. and Thompson, R.}, month = dec, year = {1971}, pages = {545--554}, file = {JSTOR Full Text PDF:/home/pauljohn/Documents/Zotero/storage/J9NFICQ4/Patterson and Thompson - 1971 - Recovery of Inter-Block Information when Block Siz.pdf:application/pdf} }, @book{degroot_optimal_1969, address = {New York}, title = {Optimal Statistical Decisions}, publisher = {{McGraw-Hill}}, author = {{DeGroot}, Morris H.}, year = {1969}, keywords = {Statistical decision} }, @book{cameron_regression_1998, address = {Cambridge ; New York}, series = {Econometric Society monographs}, title = {Regression Analysis of Count Data}, isbn = {0521632013}, number = {no. 30}, publisher = {Cambridge University Press}, author = {Cameron, Adrian Colin and Trivedi, P. K.}, year = {1998}, keywords = {Econometrics, Regression analysis} }, @book{long_regression_1997, address = {Thousand Oaks}, series = {Advanced quantitative techniques in the social sciences}, title = {Regression Models for Categorical and Limited Dependent Variables}, isbn = {0803973748}, number = {7}, publisher = {Sage Publications}, author = {Long, J. Scott}, year = {1997}, keywords = {Regression analysis} }