% Encoding: UTF-8
@Manual{RCore,
title = {R: A language and environment for statistical computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria: R Foundation for Statistical Computing},
year = {2018},
url = {https://cran.r-project.org/doc/manuals/r-release/fullrefman.pdf},
}
@book{venablesripley2000,
address = {New York},
series = {Statistics and computing},
title = {S {Programming}},
isbn = {0-387-98966-8},
publisher = {Springer},
author = {Venables, William N. and Ripley, Brian D.},
year = {2000},
keywords = {S (Computer program language)}
}
@book{chambers_programming_1998,
address = {New York},
title = {Programming with data: a guide to the {S} language},
isbn = {0-387-98503-4},
shorttitle = {Programming with data},
publisher = {Springer},
author = {Chambers, John M.},
year = {1998},
keywords = {Data processing, S (Computer program language), Statistics}
}
@book{chambers_software_2008,
address = {New York ; London},
series = {Statistics and computing},
title = {Software for data analysis: programming with {R}},
isbn = {978-0-387-75935-7},
shorttitle = {Software for data analysis},
publisher = {Springer},
author = {Chambers, John M.},
year = {2008},
keywords = {Data processing, Numerical analysis, R (Computer program language)}
}
@book{chambers_computational_1977,
address = {New York},
series = {Wiley series in probability and mathematical statistics},
title = {Computational methods for data analysis},
isbn = {0-471-02772-3},
publisher = {Wiley},
author = {Chambers, John M.},
year = {1977},
keywords = {Data processing, Mathematical statistics, Numerical analysis}
}
@book{gentleman_r_2008,
address = {Boca Raton},
edition = {1st},
title = {R {Programming} for {Bioinformatics}},
isbn = {978-1-4200-6367-7},
language = {English},
publisher = {Chapman and Hall/CRC},
author = {Gentleman, Robert},
month = jul,
year = {2008}
}
@book{levithan_regular_nodate,
title = {Regular {Expressions} {Cookbook}},
isbn = {978-1-4493-1943-4},
url = {http://shop.oreilly.com/product/0636920023630.do},
abstract = {Take the guesswork out of using regular expressions. With more than 140 practical recipes, this cookbook provides everything you need to solve a wide range of real-world problems. Novices will learn basic skills and tools, and programmers and...},
language = {en},
urldate = {2018-05-11},
author = {Levithan, Steven, Jan Goyvaerts}
}
@book{fox_r_2011,
address = {Thousand Oaks CA},
edition = {2nd},
title = {An {R} {Companion} to {Applied} {Regression}},
url = {http://socserv.socsci.mcmaster.ca/jfox/Books/Companion},
publisher = {Sage},
author = {Fox, John and Weisberg, Sanford},
year = {2011}
}
@book{becker_extending_1985,
address = {Monterey, Calif},
series = {Wadsworth statistics/probability series},
title = {Extending the {S} system},
isbn = {0-534-05016-6},
publisher = {Wadsworth Advanced Books and Software},
author = {Becker, Richard A.},
collaborator = {Chambers, John M.},
year = {1985},
keywords = {Data processing, Interactive computer systems, Mathematical statistics, S (Computer system), Statistics}
}
@Book{diggle_analysis_2013,
Author = {Diggle, Peter and Heagerty, Patrick and Liang,
Kung-Yee and Zeger, Scott},
Title = {Analysis of {Longitudinal} {Data}},
Publisher = {Oxford University Press},
Address = {Oxford},
Edition = {2nd},
isbn = {978-0-19-967675-0},
language = {English},
month = may,
year = 2013
}
@Book{snijders_multilevel_2011,
Author = {Snijders, Tom A. B. and Bosker, Roel},
Title = {Multilevel Analysis: An Introduction to Basic and
Advanced Multilevel Modeling},
Publisher = {{Sage} {Publications}},
Edition = {2nd},
isbn = {978-1-84920-201-5},
location = {Los Angeles, {CA}},
pagetotal = {368},
shorttitle = {Multilevel Analysis},
year = 2011
}
@Book{long_regression_1997,
Author = {Long, J. Scott},
Title = {Regression {Models} for {Categorical} and {Limited}
{Dependent} {Variables}},
Publisher = {Sage Publications},
Number = {7},
Series = {Advanced quantitative techniques in the social
sciences},
Address = {Thousand Oaks},
isbn = {0-8039-7374-8},
keywords = {Regression analysis},
year = 1997
}
@Book{singer_willett_2003,
Author = {Singer, Judith D. and Willett, John B.},
Title = {Applied longitudinal data analysis: modeling change
and event occurrence},
Publisher = {Oxford University Press},
Address = {Oxford ; New York},
isbn = {978-0-19-515296-8},
keywords = {Longitudinal method, RESEARCH, Social sciences},
shorttitle = {Applied longitudinal data analysis},
year = 2003
}
@Book{long_longitudinal_2011,
Author = {Long, Jeffrey D.},
Title = {Longitudinal {Data} {Analysis} for the {Behavioral}
{Sciences} {Using} {R}},
Publisher = {SAGE Publications, Inc},
Address = {Thousand Oaks, Calif},
abstract = {This book is unique in its focus on showing students
in the behavioral sciences how to analyze longitudinal
data using R software. The book focuses on application,
making it practical and accessible to students in
psychology, education, and related fields, who have a
basic foundation in statistics. It provides explicit
instructions in R computer programming throughout the
book, showing students exactly how a specific analysis
is carried out and how output is interpreted.},
isbn = {978-1-4129-8268-9},
language = {English},
month = oct,
year = 2011
}
@Book{rabe-hesketh_multilevel_2012,
Author = {{Rabe-Hesketh}, Sophia and Skrondal, Anders},
Title = {Multilevel and longitudinal modeling using Stata},
Publisher = {Stata Press Publication},
Edition = {3rd},
isbn = {978-1-59718-108-2 978-1-59718-103-7 978-1-59718-104-4},
keywords = {Data processing, Latent structure analysis, latent
variables, Linear models (Statistics), Mathematical
statistics, Multilevel models (Statistics), Stata},
location = {College Station, Tex},
pagetotal = {2},
year = 2012
}
@Book{raudenbush_hierarchical_2002,
Author = {Raudenbush, Stephen W. and Bryk, Anthony S.},
Title = {Hierarchical linear models: applications and data
analysis methods},
Publisher = {Sage},
isbn = {978-0-7619-1904-9},
location = {Thousand Oaks, {CA}},
pagetotal = {520},
shorttitle = {Hierarchical linear models},
year = 2002
}
@Book{mccullagh_nelder_1989,
Author = {McCullagh, Peter and Nelder, John A.},
Title = {Generalized {Linear} {Models}},
Publisher = {Chapman and Hall/CRC},
Address = {Boca Raton},
Edition = {2nd},
isbn = {978-0-412-31760-6},
language = {English},
month = aug,
year = 1989
}
@Book{greene_econometric_2008,
Author = {Greene, William H.},
Title = {Econometric analysis},
Publisher = {Prentice Hall},
Edition = {6th},
isbn = {978-0-13-600383-0},
keywords = {Econometrics},
location = {Upper Saddle River, N.J},
pagetotal = {1178},
year = 2008
}
@Book{gelman_hill_2006,
Author = {Gelman, Andrew and Hill, Jennifer},
Title = {Data {Analysis} {Using} {Regression} and
{Multilevel}/{Hierarchical} {Models}},
Publisher = {Cambridge University Press},
Address = {Cambridge; New York},
Edition = {1st},
isbn = {978-0-521-68689-1},
language = {English},
location = {Cambridge},
year = 2006
}
@Article{bates_linear_2004,
Author = {Bates, Douglas M. and DebRoy, Saikat},
Title = {Linear mixed models and penalized least squares},
Journal = {Journal of Multivariate Analysis},
Volume = {91},
Number = {1},
Pages = {1--17},
doi = {10.1016/j.jmva.2004.04.013},
issn = {0047-259X},
keywords = {ECME algorithm, EM algorithm, Gradient, Hessian,
Maximum likelihood, Multilevel models, Profile
likelihood, REML},
month = oct,
series = {Special {Issue} on {Semiparametric} and
{Nonparametric} {Mixed} {Models}},
url = {http://www.sciencedirect.com/science/article/pii/S0047259X04000867},
urldate = {2016-02-06},
year = 2004
}
@Book{hastie_elements_2001,
Author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome H},
Title = {The elements of statistical learning: data mining,
inference, and prediction},
Publisher = {Springer},
Series = {Springer series in statistics},
Address = {New York},
isbn = {0-387-95284-5},
keywords = {Supervised learning (Machine learning)},
shorttitle = {The elements of statistical learning},
year = 2001
}
@Article{fielding_2004,
Author = {{Antony Fielding}},
Title = {The {R}ole of the \{{H}\}ausman {T}est and whether
{H}igher {L}evel {E}ffects should be treated as
{R}andom or {F}ixed},
Journal = {Multilevel Modeling Newsletter},
Volume = {16},
Number = {2},
Pages = {3-9},
year = 2004
}
@Book{hsiao_analysis_2014,
Author = {Hsiao, Cheng},
Title = {Analysis of panel data},
Publisher = {Cambridge University Press},
Number = {54},
Series = {Econometric {Society} monographs},
Address = {New York, NY},
Edition = {3rd},
Note = {OCLC: ocn872561839},
abstract = {"This book provides a comprehensive, coherent, and
intuitive review of panel data methodologies that are
useful for empirical analysis. Substantially revised
from the second edition, it includes two new chapters
on modeling cross-sectionally dependent data and
dynamic systems of equations. Some of the more
complicated concepts have been further streamlined.
Other new material includes correlated random
coefficient models, pseudo-panels, duration and count
data models, quantile analysis, and alternative
approaches for controlling the impact of unobserved
heterogeneity in nonlinear panel data models"--},
isbn = {978-1-107-03869-1 978-1-107-65763-2},
keywords = {Analysis of Variance, Econometrics, Panel analysis},
year = 2014
}
@Book{hastie_elements_2009,
Author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome H.},
Title = {The elements of statistical learning data mining,
inference, and prediction},
Publisher = {Springer},
Address = {New York},
abstract = {"During the past decade there has been an explosion in
computation and information technology. With it have
come vast amounts of data in a variety of fields such
as medicine, biology, finance, and marketing. The
challenge of understanding these data has led to the
development of new tools in the field of statistics,
and spawned new areas such as data mining, machine
learning, and bioinformatics. Many of these tools have
common underpinnings but are often expressed with
different terminology. This book describes the
important ideas in these areas in a common conceptual
framework. While the approach is statistical, the
emphasis is on concepts rather than mathematics. Many
examples are given, with a liberal use of color
graphics."--Jacket.},
isbn = {978-0-387-84858-7 0-387-84858-4 978-0-387-84857-0
0-387-84857-6},
language = {English},
year = 2009
}
@Book{lindsey_models_1999,
Author = {Lindsey, J. K.},
Title = {Models for {Repeated} {Measurements}},
Publisher = {Oxford University Press},
Address = {Oxford ; New York},
Edition = {2nd},
abstract = {Models for Repeated Measurements will interest
research statisticians in agriculture, medicine,
economics, and psychology, as well as the many
consulting statisticians who want an up-to-date
expository account of this important topic. This
edition of this successful book has been completely
updated to take into account the many developments in
the area over the last few years. It features three new
chapters on models for continuous non-normal data, on
various design issues specific to repeated
measurements, and on missing data and dropouts.
Exercises have been added at the ends of most chapters,
and the software for carrying out the analyses is now
available to the public. The book begins with a
development of the general context of repeated
measurements. It then describes the three basic types
of response variables--continuous (normal),
categorical, and count data--and develops a practical
framework for creating suitable models and for applying
ideas on multivariate distributions and stochastic
processes. The book then devotes three sections to
examining a large number of concrete examples,
including data tables, to illustrate the models
available. The book also includes an extensive list of
references.},
isbn = {978-0-19-850559-4},
language = {English},
month = sep,
year = 1999
}
@Book{fitzmaurice_applied_2011,
Author = {Fitzmaurice, Garrett M. and Laird, Nan M. and Ware,
James H.},
Title = {Applied longitudinal analysis},
Publisher = {Wiley},
Series = {Wiley series in probability and statistics},
Address = {Hoboken, N.J},
Edition = {2nd},
abstract = {"Since the publication of the first edition, the
authors have solicited feedback from both the
instructors who use the book as a text for their
courses as well as the researchers who use the book as
a resource for their research. Thus, the improved
Second Edition of Applied Longitudinal Analysis
features many additions and revisions based on the
feedback of readers, making it the go-to reference for
applied use in public health, epidemiology, and
pharmaceutical sciences"--},
isbn = {978-0-470-38027-7},
keywords = {Longitudinal method, Medical statistics, Multivariate
analysis, Regression analysis},
year = 2011
}
@Book{mccullagh_nelder_1983,
Author = {McCullagh, P. and Nelder, John A.},
Title = {Generalized {Linear} {Models}},
Publisher = {Chapman and Hall},
Number = {37},
Series = {Monographs on statistics and applied probability},
Address = {London},
collaborator = {Nelder, John A},
isbn = {0-412-23850-0},
keywords = {Linear models (Statistics)},
year = 1983
}
@Article{king_statistical_1988,
Author = {King, Gary},
Title = {Statistical {Models} for {Political} {Science} {Event}
{Counts}: {Bias} in {Conventional} {Procedures} and
{Evidence} for the {Exponential} {Poisson} {Regression}
{Model}},
Journal = {American Journal of Political Science},
Volume = {32},
Number = {3},
Pages = {838--863},
abstract = {This paper presents analytical, Monte Carlo, and
empirical evidence on models for event count data.
Event counts are dependent variables that measure the
number of times some event occurs. Counts of
international events are probably the most common, but
numerous examples exist in every empirical field of the
discipline. The results of the analysis below strongly
suggest that the way event counts have been analyzed in
hundreds of important political science studies have
produced statistically and substantively unreliable
results. Misspecification, inefficiency, bias,
inconsistency, insufficiency, and other problems result
from the unknowing application of two common methods
that are without theoretical justification or empirical
utility in this type of data. I show that the
exponential Poisson regression (EPR) model provides
analytically, in large samples, and empirically, in
small, finite samples, a far superior model and optimal
estimator. I also demonstrate the advantage of this
methodology in an application to nineteenth-century
party switching in the U.S. Congress. Its use by
political scientists is strongly encouraged.},
doi = {10.2307/2111248},
file = {JSTOR Full Text
PDF:/home/pauljohn/.mozilla/firefox/mx4cgvoq.default-1391919321660/zotero/storage/G5GEHHWM/King
- 1988 - Statistical Models for Political Science Event
Cou.pdf:application/pdf},
issn = {0092-5853},
shorttitle = {Statistical {Models} for {Political} {Science} {Event}
{Counts}},
url = {http://www.jstor.org/stable/2111248},
urldate = {2011-08-18},
year = 1988
}
@Book{wood_generalized_2006,
Author = {Wood, Simon N},
Title = {Generalized {Additive} {Models}: {An} {Introduction}
with {R}},
Publisher = {Chapman \& Hall/CRC},
Address = {Boca Raton, FL},
isbn = {1-58488-474-6 978-1-58488-474-3},
language = {English},
shorttitle = {Generalized additive models},
year = 2006
}
@InCollection{snijders_berkof_2008,
Author = {{T.A.B. Snijders and J. Berkof}},
Title = {Diagnostic checks for multilevel models},
BookTitle = {Handbook of {M}ultilevel {A}nalysis},
Publisher = {New York: Springer},
Pages = {141-175},
year = 2008
}
@Book{hox_handbook_2010,
Editor = {Hox, Joop and Roberts, J. Kyle},
Title = {Handbook of {Advanced} {Multilevel} {Analysis}},
Publisher = {Routledge},
Address = {New York},
Edition = {1st},
abstract = {This new handbook is the definitive resource on
advanced topics related to multilevel analysis. The
editors assembled the top minds in the field to address
the latest applications of multilevel modeling as well
as the specific difficulties and methodological
problems that are becoming more common as more
complicated models are developed. Each chapter features
examples that use actual datasets. These datasets, as
well as the code to run the models, are available on
the book’s website http://www.hlm-online.com . Each
chapter includes an introduction that sets the stage
for the material to come and a conclusion. Divided into
five sections, the first provides a broad introduction
to the field that serves as a framework for
understanding the latter chapters. Part 2 focuses on
multilevel latent variable modeling including item
response theory and mixture modeling. Section 3
addresses models used for longitudinal data including
growth curve and structural equation modeling. Special
estimation problems are examined in section 4 including
the difficulties involved in estimating survival
analysis, Bayesian estimation, bootstrapping, multiple
imputation, and complicated models, including
generalized linear models, optimal design in multilevel
models, and more. The book’s concluding section
focuses on statistical design issues encountered when
doing multilevel modeling including nested designs,
analyzing cross-classified models, and dyadic data
analysis. Intended for methodologists, statisticians,
and researchers in a variety of fields including
psychology, education, and the social and health
sciences, this handbook also serves as an excellent
text for graduate and PhD level courses in multilevel
modeling. A basic knowledge of multilevel modeling is
assumed.},
isbn = {978-1-84169-722-2},
language = {English},
month = jul,
year = 2010
}
@Book{pinheiro_bates_2000,
Author = {Pinheiro, Jos{\textbackslash}'\{e\} C. and Bates,
Douglas M.},
Title = {Mixed-{Effects} {Models} in {S} and {S}-{PLUS}},
Publisher = {Springer},
Series = {Statistics and computing},
Address = {New York},
isbn = {0-387-98957-9},
year = 2000
}
@Book{cameron_regression_2013,
Author = {Cameron, A. Colin and Trivedi, Pravin K.},
Title = {Regression {Analysis} of {Count} {Data}},
Publisher = {Cambridge University Press},
Address = {Cambridge ; New York, NY},
Edition = {2nd},
abstract = {Students in both social and natural sciences often
seek regression methods to explain the frequency of
events, such as visits to a doctor, auto accidents, or
new patents awarded. This book provides the most
comprehensive and up-to-date account of models and
methods to interpret such data. The authors have
conducted research in the field for more than
twenty-five years. In this book, they combine theory
and practice to make sophisticated methods of analysis
accessible to researchers and practitioners working
with widely different types of data and software in
areas such as applied statistics, econometrics,
marketing, operations research, actuarial studies,
demography, biostatistics, and quantitative social
sciences. The book may be used as a reference work on
count models or by students seeking an authoritative
overview. Complementary material in the form of data
sets, template programs, and bibliographic resources
can be accessed on the Internet through the authors'
homepages. This second edition is an expanded and
updated version of the first, with new empirical
examples and more than one hundred new references
added. The new material includes new theoretical
topics, an updated and expanded treatment of
cross-section models, coverage of bootstrap-based and
simulation-based inference, expanded treatment of time
series, multivariate and panel data, expanded treatment
of endogenous regressors, coverage of quantile count
regression, and a new chapter on Bayesian methods.},
isbn = {978-1-107-66727-3},
language = {English},
month = may,
year = 2013
}
@Book{hox_multilevel_2010,
Author = {Hox, Joop J. and Moerbeek, Mirjam and Schoot, Rens van
de},
Title = {Multilevel {Analysis}: {Techniques} and
{Applications}},
Publisher = {Routledge},
Address = {New York},
Edition = {2nd},
abstract = {This practical introduction helps readers apply
multilevel techniques to their research. Noted as an
accessible introduction, the book also includes
advanced extensions, making it useful as both an
introduction and as a reference to students,
researchers, and methodologists. Basic models and
examples are discussed in non-technical terms with an
emphasis on understanding the methodological and
statistical issues involved in using these models. The
estimation and interpretation of multilevel models is
demonstrated using realistic examples from various
disciplines. For example, readers will find data sets
on stress in hospitals, GPA scores, survey responses,
street safety, epilepsy, divorce, and sociometric
scores, to name a few. The data sets are available on
the website in SPSS, HLM, MLwiN, LISREL and/or Mplus
files. Readers are introduced to both the multilevel
regression model and multilevel structural models.
Highlights of the second edition include: Two new
chapters―one on multilevel models for ordinal and
count data (Ch. 7) and another on multilevel survival
analysis (Ch. 8). Thoroughly updated chapters on
multilevel structural equation modeling that reflect
the enormous technical progress of the last few years.
The addition of some simpler examples to help the
novice, whilst the more complex examples that combine
more than one problem have been retained. A new section
on multivariate meta-analysis (Ch. 11). Expanded
discussions of covariance structures across time and
analyzing longitudinal data where no trend is expected.
Expanded chapter on the logistic model for dichotomous
data and proportions with new estimation methods. An
updated website at http://www.joophox.net/ with data
sets for all the text examples and up-to-date screen
shots and PowerPoint slides for instructors. Ideal for
introductory courses on multilevel modeling and/or ones
that introduce this topic in some detail taught in a
variety of disciplines including: psychology,
education, sociology, the health sciences, and
business. The advanced extensions also make this a
favorite resource for researchers and methodologists in
these disciplines. A basic understanding of ANOVA and
multiple regression is assumed. The section on
multilevel structural equation models assumes a basic
understanding of SEM.},
isbn = {978-1-84872-846-2},
language = {English},
month = apr,
shorttitle = {Multilevel {Analysis}},
year = 2010
}
@Book{cameron_regression_1998,
Author = {Cameron, Adrian Colin},
Title = {Regression {Analysis} of {Count} {Data}},
Publisher = {Cambridge University Press},
Number = {no. 30},
Series = {Econometric {Society} monographs},
Address = {Cambridge ; New York},
collaborator = {Trivedi, P. K.},
isbn = {0-521-63201-3},
keywords = {Econometrics, Regression analysis},
year = 1998
}
@Article{greene_2011,
Author = {{William Greene}},
Title = {Fixed {E}ffects {V}ector {D}ecomposition: {A}
{M}agical {S}olution to the {P}roblem of
{T}ime-{I}nvariant {V}ariables in {F}ixed {E}ffects
{M}odels},
Journal = {Political Analysis},
Volume = {19},
Pages = {135--146},
year = 2011
}
@Article{eilers_flexible_1996,
Author = {Eilers, Paul H. C. and Marx, Brian D.},
Title = {Flexible {Smoothing} with \${B}\$-splines and
{Penalties}},
Journal = {Statistical Science},
Volume = {11},
Number = {2},
Pages = {89--102},
abstract = {\$B\$-splines are attractive for nonparametric
modelling, but choosing the optimal number and
positions of knots is a complex task. Equidistant knots
can be used, but their small and discrete number allows
only limited control over smoothness and fit. We
propose to use a relatively large number of knots and a
difference penalty on coefficients of adjacent
\$B\$-splines. We show connections to the familiar
spline penalty on the integral of the squared second
derivative. A short overview of \$B\$-splines, of their
construction and of penalized likelihood is presented.
We discuss properties of penalized \$B\$-splines and
propose various criteria for the choice of an optimal
penalty parameter. Nonparametric logistic regression,
density estimation and scatterplot smoothing are used
as examples. Some details of the computations are
presented.},
issn = {0883-4237},
url = {http://www.jstor.org/stable/2246049},
urldate = {2016-06-01},
year = 1996
}
@Article{CroissantPLM,
title = {Panel Data Econometrics in {R}: The {plm} Package},
author = {Yves Croissant and Giovanni Millo},
journal = {Journal of Statistical Software},
year = {2008},
volume = {27},
number = {2},
url = {http://www.jstatsoft.org/v27/i02/},
}
@Article{Bateslme4,
title = {Fitting Linear Mixed-Effects Models Using {lme4}},
author = {Douglas Bates and Martin M{\"a}chler and Ben Bolker and Steve Walker},
journal = {Journal of Statistical Software},
year = {2015},
volume = {67},
number = {1},
pages = {1--48},
doi = {10.18637/jss.v067.i01},
}
@Article{WickhamReshape,
title = {Reshaping Data with the {reshape} Package},
author = {Hadley Wickham},
journal = {Journal of Statistical Software},
year = {2007},
volume = {21},
number = {12},
pages = {1--20},
url = {http://www.jstatsoft.org/v21/i12/},
}
@Article{WickhamPLYR,
title = {The Split-Apply-Combine Strategy for Data Analysis},
author = {Hadley Wickham},
journal = {Journal of Statistical Software},
year = {2011},
volume = {40},
number = {1},
pages = {1--29},
url = {http://www.jstatsoft.org/v40/i01/},
}
@Manual{DowleDataTable,
title = {data.table: Extension of Data.frame},
author = {M Dowle and A Srinivasan and T Short and S Lianoglou with contributions from R Saporta and E Antonyan},
year = {2015},
note = {R package version 1.9.6},
url = {https://CRAN.R-project.org/package=data.table},
}
@Book{stodden_implementing_2014,
title = {Implementing reproducible research},
publisher = {CRC},
year = {2014},
editor = {Stodden, Victoria and Leisch, Friedrich and Peng, Roger D.},
address = {Boca Raton, FL},
month = apr,
isbn = {978-1-4665-6159-5},
abstract = {In computational science, reproducibility requires that researchers make code and data available to others so that the data can be analyzed in a similar manner as in the original publication. Code must be available to be distributed, data must be accessible in a readable format, and a platform must be available for widely distributing the data and code. In addition, both data and code need to be licensed permissively enough so that others can reproduce the work without a substantial legal burden. Implementing Reproducible Research covers many of the elements necessary for conducting and distributing reproducible research. It explains how to accurately reproduce a scientific result. Divided into three parts, the book discusses the tools, practices, and dissemination platforms for ensuring reproducibility in computational science. It describes: Computational tools, such as Sweave, knitr, VisTrails, Sumatra, CDE, and the Declaratron system Open source practices, good programming practices, trends in open science, and the role of cloud computing in reproducible research Software and methodological platforms, including open source software packages, RunMyCode platform, and open access journals Each part presents contributions from leaders who have developed software and other products that have advanced the field. Supplementary material is available at www.ImplementingRR.org.},
language = {English},
}
@Book{xie_dynamic_2015,
title = {Dynamic Documents with {R} and knitr},
publisher = {CRC},
year = {2015},
author = {Xie, Yihui},
address = {Boca Raton, FL},
edition = {2nd},
abstract = {Quickly and Easily Write Dynamic Documents Suitable for both beginners and advanced users, Dynamic Documents with R and knitr, Second Edition makes writing statistical reports easier by integrating computing directly with reporting. Reports range from homework, projects, exams, books, blogs, and web pages to virtually any documents related to statistical graphics, computing, and data analysis. The book covers basic applications for beginners while guiding power users in understanding the extensibility of the knitr package. New to the Second Edition A new chapter that introduces R Markdown v2 Changes that reflect improvements in the knitr package New sections on generating tables, defining custom printing methods for objects in code chunks, the C/Fortran engines, the Stan engine, running engines in a persistent session, and starting a local server to serve dynamic documents Boost Your Productivity in Statistical Report Writing and Make Your Scientific Computing with R Reproducible Like its highly praised predecessor, this edition shows you how to improve your efficiency in writing reports. The book takes you from program output to publication-quality reports, helping you fine-tune every aspect of your report.},
language = {English},
}
@Book{xie_bookdown:_2016,
title = {bookdown: {A}uthoring books and technical documents with {R} {M}arkdown},
publisher = {CRC},
year = {2016},
author = {Xie, Yihui},
address = {Boca Raton, FL},
month = dec,
abstract = {bookdown: Authoring Books and Technical Documents with R Markdown presents a much easier way to write books and technical publications than traditional tools such as LaTeX and Word. The bookdown package inherits the simplicity of syntax and flexibility for data analysis from R Markdown, and extends R Markdown for technical writing, so that you can make better use of document elements such as figures, tables, equations, theorems, citations, and references. Similar to LaTeX, you can number and cross-reference these elements with bookdown. Your document can even include live examples so readers can interact with them while reading the book. The book can be rendered to multiple output formats, including LaTeX/PDF, HTML, EPUB, and Word, thus making it easy to put your documents online. The style and theme of these output formats can be customized. We used books and R primarily for examples in this book, but bookdown is not only for books or R. Most features introduced in this book also apply to other types of publications: journal papers, reports, dissertations, course handouts, study notes, and even novels. You do not have to use R, either. Other choices of computing languages include Python, C, C++, SQL, Bash, Stan, JavaScript, and so on, although R is best supported. You can also leave out computing, for example, to write a fiction. This book itself is an example of publishing with bookdown and R Markdown, and its source is fully available on GitHub.},
language = {English},
shorttitle = {bookdown},
}
@Manual{xie_knitr,
title = {knitr: A general-purpose package for dynamic report generation in {R}},
author = {Yihui Xie},
year = {2018},
note = {R package version 1.20},
url = {https://yihui.name/knitr},
}
@Manual{Allaire_rmarkdown,
title = {rmarkdown: Dynamic documents for {R}},
author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Aidan Lister},
year = {2018},
note = {R package version 1.10},
url = {https://cran.r-project.org/web/packages/rmarkdown/rmarkdown.pdf},
}
@book{knuth_art_1968,
address = {Reading, Mass},
series = {Addison-{Wesley} series in computer science and information processing},
title = {The art of computer programming},
publisher = {Addison-Wesley Pub. Co},
author = {Knuth, Donald Ervin},
year = {1968},
note = {OCLC: 00474887},
keywords = {Computer programming}
}
@Book{knuth_texbook_1984,
title = {The {TeXbook}},
publisher = {Addison-Wesley},
year = {1984},
author = {Knuth, Donald Ervin},
address = {Reading, MA},
keywords = {Computerized typesetting, Mathematics printing, TeX (Computer file)},
}
@Article{knuth_literate_1984,
author = {Knuth, D. E.},
title = {Literate programming},
journal = {The Computer Journal},
year = {1984},
volume = {27},
pages = {97--111},
doi = {10.1093/comjnl/27.2.97},
}
@book{Dalgaard2002,
address = {New York},
series = {Statistics and computing},
title = {Introductory statistics with {R}},
isbn = {978-0-387-95475-2},
publisher = {Springer},
author = {Dalgaard, Peter},
year = {2002},
note = {OCLC: ocm49386016},
keywords = {Data processing, R (Computer program language), Statistics}
}
@InCollection{leisch2002,
author = {Friedrich Leisch},
title = {Sweave: Dynamic generation of statistical reports using literate data analysis},
booktitle = {Compstat 2002: Proceedings in the 15th {C}omputational {S}tatistics {S}ymposium in {B}erlin},
publisher = {Physica-Verlag Heidelberg},
year = {2002},
editor = {W. Härdle and B. Rönz},
pages = {575--580},
address = {New York},
}
@Manual{MacFarlane,
title = {Pandoc user's guide},
author = {John MacFarlane},
year = {2018},
url = {https://pandoc.org/MANUAL.pdf},
}
@Manual{bootstrap,
title = {Bootstrap},
author = {Jacob Thornton and Mark Otto},
year = {2018},
abstract = {The most popular HTML, CSS, and JS library in the world.},
language = {en},
url = {https://getbootstrap.com/},
}
@Manual{Tantau2016,
title = {The {BEAMER} class: User guide for {V}ersion 3.50},
author = {Till Tantau and Joseph Wright and Vedran Miletić},
year = {2016},
url = {http://www.ctan.org/tex-archive/macros/latex/contrib/beamer/doc/beameruserguide.pdf},
}
@article{king_logistic_2001,
title = {Logistic {Regression} in {Rare} {Events} {Data}},
volume = {9},
issn = {1047-1987, 1476-4989},
url = {http://pan.oxfordjournals.org/content/9/2/137},
abstract = {We study rare events data, binary dependent variables with dozens to thousands of times fewer ones (events, such as wars, vetoes, cases of political activism, or epidemiological infections) than zeros ({\textquotedblleft}nonevents{\textquotedblright}). In many literatures, these variables have proven difficult to explain and predict, a problem that seems to have at least two sources. First, popular statistical procedures, such as logistic regression, can sharply underestimate the probability of rare events. We recommend corrections that outperform existing methods and change the estimates of absolute and relative risks by as much as some estimated effects reported in the literature. Second, commonly used data collection strategies are grossly inefficient for rare events data. The fear of collecting data with too few events has led to data collections with huge numbers of observations but relatively few, and poorly measured, explanatory variables, such as in international conflict data with more than a quarter-million dyads, only a few of which are at war. As it turns out, more efficient sampling designs exist for making valid inferences, such as sampling all available events (e.g., wars) and a tiny fraction of nonevents (peace). This enables scholars to save as much as 99\% of their (nonfixed) data collection costs or to collect much more meaningful explanatory variables. We provide methods that link these two results, enabling both types of corrections to work simultaneously, and software that implements the methods developed.},
language = {en},
number = {2},
urldate = {2013-04-12},
journal = {Political Analysis},
author = {King, Gary and Zeng, Langche},
month = jan,
year = {2001},
pages = {137--163}
}
@article{firth_bias_1993,
title = {Bias reduction of maximum likelihood estimates},
volume = {80},
issn = {0006-3444, 1464-3510},
url = {http://biomet.oxfordjournals.org/content/80/1/27},
doi = {10.1093/biomet/80.1.27},
abstract = {SUMMARY It is shown how, in regular parametric problems, the first-order term is removed from the asymptotic bias of maximum likelihood estimates by a suitable modification of the score function. In exponential families with canonical parameterization the effect is to penalize the likelihood by the Jeffreys invariant prior. In binomial logistic models, Poisson log linear models and certain other generalized linear models, the Jeffreys prior penalty function can be imposed in standard regression software using a scheme of iterative adjustments to the data.},
language = {en},
number = {1},
urldate = {2013-04-12},
journal = {Biometrika},
author = {Firth, David},
month = mar,
year = {1993},
keywords = {Asymptotic bias;, Biased estimating equations;, Exponential family;, Generalized linear model;, Jeffreys prior;, Logistic regression;, Modified score;, Penalized likelihood;, Shrinkage},
pages = {27--38}
}
@article{cummings_relative_2009,
title = {The {Relative} {Merits} of {Risk} {Ratios} and {Odds} {Ratios}},
volume = {163},
issn = {1072-4710},
url = {https://jamanetwork.com/journals/jamapediatrics/fullarticle/381459},
doi = {10.1001/archpediatrics.2009.31},
abstract = {When a study outcome is rare in all strata used for an analysis, the odds ratio estimate of causal effects will approximate the risk ratio; therefore, odds ratios from most case-control studies can be interpreted as risk ratios. However, if a study outcome is common, the odds ratio will be further from 1 than the risk ratio. There is debate regarding the merits of risk ratios compared with odds ratios for the analysis of trials and cohort and cross-sectional studies with common outcomes. Odds ratios are conveniently symmetrical with regard to the outcome definition; the odds ratio for outcome Y is the inverse of the odds ratio for the outcome not Y. Risk ratios lack this symmetry, so it may be necessary to present 1 risk ratio for outcome Y and another for outcome not Y. Risk ratios, but not odds ratios, have a mathematical property called collapsibility; this means that the size of the risk ratio will not change if adjustment is made for a variable that is not a confounder. Because of collapsibility, the risk ratio, assuming no confounding, has a useful interpretation as the ratio change in average risk due to exposure among the exposed. Because odds ratios are not collapsible, they usually lack any interpretation either as the change in average odds or the average change in odds (the average odds ratio).},
language = {en},
number = {5},
urldate = {2018-09-18},
journal = {Archives of Pediatrics \& Adolescent Medicine},
author = {Cummings, Peter},
month = may,
year = {2009},
pages = {438--445}
}
@article{sainani_understanding_2011,
title = {Understanding {Odds} {Ratios}},
volume = {3},
issn = {1934-1482, 1934-1563},
url = {https://www.pmrjournal.org/article/S1934-1482(11)00053-0/abstract},
doi = {10.1016/j.pmrj.2011.01.009},
language = {English},
number = {3},
urldate = {2018-09-18},
journal = {PM\&R},
author = {Sainani, Kristin L.},
month = mar,
year = {2011},
pmid = {21402371, 21402371},
pages = {263--267}
}
@article{lee_practical_2009,
title = {A {Practical} {Guide} for {Multivariate} {Analysis} of {Dichotomous} {Outcomes}},
volume = {38},
number = {8},
journal = {Annals Academy of Medicine, Singapore},
author = {Lee, James and Tan, Kee Seng and Chia, Kee Seng},
year = {2009},
pages = {714--719}
}