GAMs.bib


@article{beyerlein_alternative_2008,
	title = {Alternative regression models to assess increase in childhood {BMI}},
	volume = {8},
	issn = {1471-2288},
	url = {http://www.biomedcentral.com/1471-2288/8/59},
	doi = {10.1186/1471-2288-8-59},
	number = {1},
	urldate = {2012-05-16},
	journal = {BMC Medical Research Methodology},
	author = {Beyerlein, Andreas and Fahrmeir, Ludwig and Mansmann, Ulrich and Toschke, André M},
	year = {2008},
	pages = {59},
	file = {BMC Medical Research Methodology | Full text | Alternative regression models to assess increase in childhood BMI:/Users/micl/Zotero/storage/KADXBB3S/59.html:text/html}
}

@book{wood_generalized_2006,
	title = {Generalized additive models: an introduction with {R}},
	volume = {66},
	shorttitle = {Generalized additive models},
	publisher = {CRC Press},
	author = {Wood, S. N},
	year = {2006},
	file = {[PDF] from bath.ac.uk:/Users/micl/Zotero/storage/N99I9S57/Wood - 2006 - Generalized additive models an introduction with .pdf:application/pdf;Snapshot:/Users/micl/Zotero/storage/AVVZHAIN/Wood - 2006 - Generalized additive models an introduction with .html:text/html}
}

@book{rasmussen_gaussian_2006,
	address = {Cambridge, Mass.},
	title = {Gaussian processes for machine learning},
	isbn = {0-262-18253-X 978-0-262-18253-9},
	abstract = {"Gaussian processes (GPs) provide a principled, practical, probabilistic approach to learning in kernel machines. GPs have received increased attention in the machine-learning community over the past decade, and this book provides a long-needed systematic and unified treatment of theoretical and practical aspects of GPs in machine learning. The treatment is comprehensive and self-contained, targeted at researchers and students in machine learning and applied statistics."--Jacket.},
	language = {English},
	publisher = {MIT Press},
	author = {Rasmussen, Carl Edward and Williams, Christopher K. I},
	year = {2006}
}

@book{ruppert_semiparametric_2003,
	title = {Semiparametric {Regression}},
	isbn = {978-0-521-78516-7},
	abstract = {Semiparametric regression is concerned with the flexible incorporation of non-linear functional relationships in regression analyses. Any application area that benefits from regression analysis can also benefit from semiparametric regression. Assuming only a basic familiarity with ordinary parametric regression, this user-friendly book explains the techniques and benefits of semiparametric regression in a concise and modular fashion. The authors make liberal use of graphics and examples plus case studies taken from environmental, financial, and other applications. They include practical advice on implementation and pointers to relevant software. The book is suitable as a textbook for students with little background in regression as well as a reference book for statistically oriented scientists such as biostatisticians, econometricians, quantitative social scientists, epidemiologists, with a good working knowledge of regression and the desire to begin using more flexible semiparametric models. Even experts on semiparametric regression should find something new here.},
	language = {en},
	publisher = {Cambridge University Press},
	author = {Ruppert, David and Wand, Matt P. and Carroll, Raymond J.},
	month = jul,
	year = {2003},
	keywords = {Mathematics / Probability \& Statistics / General, Mathematics / General, Regression analysis, Mathematics / Probability \& Statistics / Regression Analysis, Medical / Epidemiology, Nonparametric statistics}
}

@book{fox_nonparametric_2000,
	title = {Nonparametric {Simple} {Regression}: {Smoothing} {Scatterplots}},
	isbn = {978-0-7619-1585-0},
	shorttitle = {Nonparametric {Simple} {Regression}},
	abstract = {John Fox introduces readers to the techniques of kernel estimation, additive nonparametric regression, and the ways nonparametric regression can be employed to select transformations of the data preceding a linear least-squares fit.},
	language = {en},
	publisher = {SAGE},
	author = {Fox, John},
	month = jan,
	year = {2000},
	keywords = {Mathematics / Probability \& Statistics / General, Social Science / Research, Regression analysis, Nonparametric statistics, Medical / General, Social Science / General, Social Science / Statistics, Social sciences}
}

@book{fox_multiple_2000,
	title = {Multiple and {Generalized} {Nonparametric} {Regression}},
	isbn = {978-0-7619-2189-9},
	abstract = {This book builds on John Fox's previous volume in the QASS Series, Non Parametric Simple Regression. In this book, the reader learns how to estimate and plot smooth functions when there are multiple independent variables.},
	language = {en},
	publisher = {SAGE},
	author = {Fox, John},
	month = may,
	year = {2000},
	keywords = {Mathematics / Probability \& Statistics / General, Social Science / Research, Regression analysis, Mathematics / Probability \& Statistics / Regression Analysis, Nonparametric statistics, Social Science / General, Social Science / Statistics, Social sciences, Social Science / Methodology, Social sciences - Statistical methods, Social sciences/ Statistical methods}
}

@book{wasserman_all_2006,
	title = {All of {Nonparametric} {Statistics}},
	isbn = {978-0-387-25145-5},
	abstract = {The goal of this text is to provide the reader with a single book where they can find a brief account of many, modern topics in nonparametric inference. The book is aimed at Master's level or Ph.D. level students in statistics, computer science, and engineering. It is also suitable for researchers who want to get up to speed quickly on modern nonparametric methods. This text covers a wide range of topics including: the bootstrap, the nonparametric delta method, nonparametric regression, density estimation, orthogonal function methods, minimax estimation, nonparametric confidence sets, and wavelets. The book has a mixture of methods and theory. From the reviews: "...The book is excellent." (Short Book Reviews of the ISI, June 2006) "Now we have All of Nonparametric Statistics a?{\textbar} . the writing is excellent and the author is to be congratulated on the clarity achieved. a?{\textbar} the book is excellent." (N.R. Draper, Short Book Reviews, Vol. 26 (1), 2006) "Overall, I enjoyed reading this book very much. I like Wasserman's intuitive explanations and careful insights into why one path or approach is taken over another. Most of all, I am impressed with the wealth of information on the subject of asymptotic nonparametric inferences." (Stergios B. Fotopoulos for Technometrics, Vol. 49, No. 1., February 2007)},
	language = {en},
	publisher = {Springer},
	author = {Wasserman, Larry},
	year = {2006},
	keywords = {statistics, Mathematics / Probability \& Statistics / General, Mathematics / General, Nonparametric statistics, Artificial intelligence, Computers / Intelligence (AI) \& Semantics, Mathematical statistics}
}

@book{venables_modern_2002,
	title = {Modern {Applied} {Statistics} {With} {S}},
	isbn = {978-0-387-95457-8},
	abstract = {S-PLUS is a powerful environment for the statistical and graphical analysis of data. It provides the tools to implement many statistical ideas which have been made possible by the widespread availability of workstations having good graphics and computational capabilities. This book is a guide to using S-PLUS to perform statistical analyses and provides both an introduction to the use of S-PLUS and a course in modern statistical methods. S-PLUS is available for both Windows and UNIX workstations, and both versions are covered in depth.The aim of the book is to show how to use S-PLUS as a powerful and graphical data analysis system. Readers are assumed to have a basic grounding in statistics, and so the book in intended for would-be users of S-PLUS and both students and researchers using statistics. Throughout, the emphasis is on presenting practical problems and full analyses of real data sets. Many of the methods discussed are state-of-the-art approaches to topics such as linear, nonlinear, and smooth regression models, tree-based methods, multivariate analysis and pattern recognition, survival analysis, time series and spatial statistics. Throughout, modern techniques such as robust methods, non-parametric smoothing, and bootstrapping are used where appropriate.This third edition is intended for users of S-PLUS 4.5, 5.0, 2000 or later, although S-PLUS 3.3/4 are also considered. The major change from the second edition is coverage of the current versions of S-PLUS. The material has been extensively rewritten using new examples and the latest computationally intensive methods. The companion volume on S Programming will provide an in-depth guide for those writing software in the S language.The authors have written several software libraries that enhance S-PLUS; these and all the datasets used are available on the Internet in versions for Windows and UNIX. There are extensive on-line complements covering advanced material, user-contributed extensions, further exercises, and new features of S-PLUS as they are introduced.Dr. Venables is now Statistician with CSRIO in Queensland, having been at the Department of Statistics, University of Adelaide, for many years previously. He has given many short courses on S-PLUS in Australia, Europe, and the USA. Professor Ripley holds the Chair of Applied Statistics at the University of Oxford, and is the author of four other books on spatial statistics, simulation, pattern recognition, and neural networks.},
	language = {en},
	publisher = {Birkhäuser},
	author = {Venables, William N. and Ripley, Brian D.},
	month = aug,
	year = {2002},
	keywords = {statistics, Mathematics / Probability \& Statistics / General, Computers / Mathematical \& Statistical Software, Mathematical statistics, Business \& Economics / Statistics, Mathematical statistics - Data processing, Mathematical statistics/ Data processing, S, S (Computer program language), S (Computer system), S-PLUS (Computer program language), Statistics - Data processing, Statistics/ Data processing}
}

@book{hastie_generalized_1990,
	title = {Generalized {Additive} {Models}},
	isbn = {978-0-412-34390-2},
	language = {en},
	publisher = {CRC Press},
	author = {Hastie, T.J. and Tibshirani, R.J.},
	month = jun,
	year = {1990},
	keywords = {Mathematics / Probability \& Statistics / General}
}

@book{hastie_elements_2009,
	edition = {2nd ed. 2009. Corr. 3rd printing 5th Printing.},
	title = {The {Elements} of {Statistical} {Learning}: {Data} {Mining}, {Inference}, and {Prediction}, {Second} {Edition}},
	isbn = {0-387-84857-6},
	shorttitle = {The {Elements} of {Statistical} {Learning}},
	publisher = {Springer},
	author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
	month = feb,
	year = {2009}
}

@article{breiman_statistical_2001,
	title = {Statistical {Modeling}: {The} {Two} {Cultures} (with comments and a           rejoinder by the author)},
	volume = {16},
	issn = {0883-4237},
	shorttitle = {Statistical {Modeling}},
	url = {http://projecteuclid.org/euclid.ss/1009213726},
	doi = {10.1214/ss/1009213726},
	abstract = {There are two cultures in the use of statistical modeling to reach
             conclusions from data. One assumes that the data are generated by a given
             stochastic data model. The other uses algorithmic models and treats the data
             mechanism as unknown. The statistical community has been committed to the
             almost exclusive use of data models. This commitment has led to irrelevant
             theory, questionable conclusions, and has kept statisticians from working on a
             large range of interesting current problems. Algorithmic modeling, both in
             theory and practice, has developed rapidly in fields outside statistics. It can
             be used both on large complex data sets and as a more accurate and informative
             alternative to data modeling on smaller data sets. If our goal as a field is to
             use data to solve problems, then we need to move away from exclusive dependence
             on data models and adopt a more diverse set of tools.},
	number = {3},
	urldate = {2012-07-22},
	journal = {Statistical Science},
	author = {Breiman, Leo},
	month = aug,
	year = {2001},
	note = {Mathematical Reviews number (MathSciNet): MR1874152},
	pages = {199--231}
}

@article{rigby_generalized_2005,
	title = {Generalized additive models for location, scale and shape},
	volume = {54},
	issn = {1467-9876},
	url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9876.2005.00510.x/abstract},
	doi = {10.1111/j.1467-9876.2005.00510.x},
	abstract = {Summary. A general class of statistical models for a univariate response variable is presented which we call the generalized additive model for location, scale and shape (GAMLSS). The model assumes independent observations of the response variable y given the parameters, the explanatory variables and the values of the random effects. The distribution for the response variable in the GAMLSS can be selected from a very general family of distributions including highly skew or kurtotic continuous and discrete distributions. The systematic part of the model is expanded to allow modelling not only of the mean (or location) but also of the other parameters of the distribution of y, as parametric and/or additive nonparametric (smooth) functions of explanatory variables and/or random-effects terms. Maximum (penalized) likelihood estimation is used to fit the (non)parametric models. A Newton–Raphson or Fisher scoring algorithm is used to maximize the (penalized) likelihood. The additive terms in the model are fitted by using a backfitting algorithm. Censored data are easily incorporated into the framework. Five data sets from different fields of application are analysed to emphasize the generality of the GAMLSS class of models.},
	language = {en},
	number = {3},
	urldate = {2012-07-13},
	journal = {Journal of the Royal Statistical Society: Series C (Applied Statistics)},
	author = {Rigby, R. A. and Stasinopoulos, D. M.},
	year = {2005},
	keywords = {Beta–binomial distribution, Box–Cox transformation, Centile estimation, Cubic smoothing splines, Generalized linear mixed model, LMS method, Negative binomial distribution, Non-normality, Nonparametric models, Overdispersion, Penalized likelihood, Random effects, Skewness and kurtosis},
	pages = {507--554},
	file = {Full Text PDF:/Users/micl/Zotero/storage/D6PZADQB/Rigby and Stasinopoulos - 2005 - Generalized additive models for location, scale an.pdf:application/pdf;Snapshot:/Users/micl/Zotero/storage/W752GFV4/full.html:text/html}
}

@book{hardin_generalized_2012,
	edition = {3},
	title = {Generalized {Linear} {Models} and {Extensions}, {Third} {Edition}},
	isbn = {1-59718-105-6},
	publisher = {Stata Press},
	author = {Hardin, James W. and Hilbe, Joseph M.},
	month = jun,
	year = {2012}
}

@article{friedman_projection_1981,
	title = {Projection {Pursuit} {Regression}},
	volume = {76},
	issn = {0162-1459},
	url = {http://www.jstor.org/stable/2287576},
	doi = {10.2307/2287576},
	abstract = {A new method for nonparametric multiple regression is presented. The procedure models the regression surface as a sum of general smooth functions of linear combinations of the predictor variables in an iterative manner. It is more general than standard stepwise and stagewise regression procedures, does not require the definition of a metric in the predictor space, and lends itself to graphical interpretation.},
	number = {376},
	urldate = {2012-06-26},
	journal = {Journal of the American Statistical Association},
	author = {Friedman, Jerome H. and Stuetzle, Werner},
	month = dec,
	year = {1981},
	note = {ArticleType: research-article / Full publication date: Dec., 1981 / Copyright © 1981 American Statistical Association},
	pages = {817--823}
}

@book{bybee_pisa_2009,
	title = {Pisa {Science} 2006: {Implications} for {Science} {Teachers} and {Teaching}},
	isbn = {978-1-933531-31-1},
	shorttitle = {Pisa {Science} 2006},
	language = {en},
	publisher = {NSTA Press},
	author = {Bybee, Rodger W. and McCrae, Barry},
	month = may,
	year = {2009},
	keywords = {Education / Testing \& Measurement, Education / General, Education / Student Life \& Student Affairs, Education / Teaching Methods \& Materials / Science \& Technology, Educational tests and measurements, High school students, High school students - Rating of, High school students/ Rating of, Programme for International Student Assessment, Science, Science - Study and teaching - United States, Science - Study and teaching (Secondary), Science / Study \& Teaching, Science/ Study and teaching (Secondary)}
}

@book{hardin_generalized_2007,
	title = {Generalized linear models and extensions},
	publisher = {Stata Corp},
	author = {Hardin, J. W and Hilbe, J.},
	year = {2007},
	file = {Snapshot:/Users/micl/Zotero/storage/9T3DGIWI/Hardin and Hilbe - 2007 - Generalized linear models and extensions.html:text/html}
}

@article{simpson_modelling_2018,
	title = {Modelling {Palaeoecological} {Time} {Series} {Using} {Generalised} {Additive} {Models}},
	volume = {6},
	issn = {2296-701X},
	url = {https://www.frontiersin.org/articles/10.3389/fevo.2018.00149/full},
	doi = {10.3389/fevo.2018.00149},
	abstract = {In the absence of annual laminations, time series generated from lake sediments or other similar stratigraphic sequences are irregularly spaced in time, which complicates formal analysis using classical statistical time series models. In lieu, statistical analyses of trends in palaeoenvironmental time series, if done at all, have typically used simpler linear regressions or (non-) parametric correlations with little regard for the violation of assumptions that almost surely occurs due to temporal dependencies in the data or that correlations do not provide estimates of the magnitude of change, just whether or not there is a linear or monotonic trend. Alternative approaches have used LOESS-estimated trends to justify data interpretations or test hypotheses as to the causal factors without considering the inherent subjectivity of the choice of parameters used to achieve the LOESS fit (e.g. span width, degree of polynomial). Generalized additive models (GAMs) are statistical models that can be used to estimate trends as smooth functions of time. Unlike LOESS, GAMs use automatic smoothness selection methods to objectively determine the complexity of the fitted trend, and as formal statistical models, GAMs, allow for potentially complex, non-linear trends, a proper accounting of model uncertainty, and the identification of periods of significant temporal change. Here, I present a consistent and modern approach to the estimation of trends in palaeoenvironmental time series using GAMs, illustrating features of the methodology with two example time series of contrasting complexity; a 150-year bulk organic matter δ15N time series from Small Water, UK, and a 3000-year alkenone record from Braya-Sø, Greenland. I discuss the underlying mechanics of GAMs that allow them to learn the shape of the trend from the data themselves and how simultaneous confidence intervals and the first derivatives of the trend are used to properly account for model uncertainty and identify periods of change. It is hoped that by using GAMs greater attention is paid to the statistical estimation of trends in palaeoenvironmental time series leading to more a robust and reproducible palaeoscience.},
	language = {English},
	urldate = {2019-02-10},
	journal = {Frontiers in Ecology and Evolution},
	author = {Simpson, Gavin L.},
	year = {2018},
	keywords = {environmental change, generalized additive models, simultaneous interval, Spline, time series},
	file = {Full Text PDF:/Users/micl/Zotero/storage/8RLFG7ZV/Simpson - 2018 - Modelling Palaeoecological Time Series Using Gener.pdf:application/pdf}
}

@article{friedman_additive_2000,
	title = {Additive logistic regression: a statistical view of boosting ({With} discussion and a rejoinder by the authors)},
	volume = {28},
	issn = {0090-5364, 2168-8966},
	shorttitle = {Additive logistic regression},
	url = {https://projecteuclid.org/euclid.aos/1016218223},
	doi = {10.1214/aos/1016218223},
	abstract = {Boosting is one of the most important recent developments in classification methodology. Boosting works by sequentially applying a classification algorithm to reweighted versions of the training data and then taking a weighted majority vote of the sequence of classifiers thus produced. For many classification algorithms, this simple strategy results in dramatic improvements in performance. We show that this seemingly mysterious phenomenon can be understood in terms of well-known statistical principles, namely additive modeling and maximum likelihood. For the two-class problem, boosting can be viewed as an approximation to additive modeling on the logistic scale using maximum Bernoulli likelihood as a criterion. We develop more direct approximations and show that they exhibit nearly identical results to boosting. Direct multiclass generalizations based on multinomial likelihood are derived that exhibit performance comparable to other recently proposed multiclass generalizations of boosting in most situations, and far superior in some. We suggest a minor modification to boosting that can reduce computation, often by factors of 10 to 50. Finally, we apply these insights to produce an alternative formulation of boosting decision trees. This approach, based on best-first truncated tree induction, often leads to better performance, and can provide interpretable descriptions of the aggregate decision rule. It is also much faster computationally, making it more suitable to large-scale data mining applications.},
	language = {EN},
	number = {2},
	urldate = {2019-02-10},
	journal = {The Annals of Statistics},
	author = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
	month = apr,
	year = {2000},
	mrnumber = {MR1790002},
	zmnumber = {1106.62323},
	keywords = {classification, machine learning, nonparametric estimation, stagewise fitting, tree},
	pages = {337--407},
	file = {Full Text PDF:/Users/micl/Zotero/storage/DRP4S38J/Friedman et al. - 2000 - Additive logistic regression a statistical view o.pdf:application/pdf;Snapshot:/Users/micl/Zotero/storage/IU9EH95A/1016218223.html:text/html}
}


@article{agarwal2021neural,
  title={Neural additive models: Interpretable machine learning with neural nets},
  author={Agarwal, Rishabh and Melnick, Levi and Frosst, Nicholas and Zhang, Xuezhou and Lengerich, Ben and Caruana, Rich and Hinton, Geoffrey E},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  pages={4699--4711},
  year={2021}
}


@article{xu2022sparse,
  title={Sparse Neural Additive Model: Interpretable Deep Learning with Feature Selection via Group Sparsity},
  author={Xu, Shiyun and Bu, Zhiqi and Chaudhari, Pratik and Barnett, Ian J},
  journal={arXiv preprint arXiv:2202.12482},
  year={2022}
}