diff --git a/articles/finding-features.html b/articles/finding-features.html
index eb47892..584aa3c 100644
--- a/articles/finding-features.html
+++ b/articles/finding-features.html
@@ -290,7 +290,7 @@ <h2 id="creating-your-own-features">Creating your own Features<a class="anchor"
 <span><span class="co">#&gt; $med</span></span>
 <span><span class="co">#&gt; function (x, na.rm = FALSE, ...) </span></span>
 <span><span class="co">#&gt; UseMethod("median")</span></span>
-<span><span class="co">#&gt; &lt;bytecode: 0x5612f04c6d90&gt;</span></span>
+<span><span class="co">#&gt; &lt;bytecode: 0x558ebdb2bd90&gt;</span></span>
 <span><span class="co">#&gt; &lt;environment: namespace:stats&gt;</span></span>
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; $max</span></span>
diff --git a/index.html b/index.html
index 21a3fc8..fc16a03 100644
--- a/index.html
+++ b/index.html
@@ -5,14 +5,14 @@
 <meta charset="utf-8">
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
 <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-<meta name="description" content='Provides a framework of tools to summarise, visualise, and explore longitudinal data. It builds upon the tidy time series data frames used in the tsibble package, and is designed to integrate within the tidyverse, and tidyverts (for time series) ecosystems. The methods implemented include calculating features for understanding longitudinal data, including calculating summary statistics such as quantiles, medians, and numeric ranges, sampling individual series, identifying individual series representative of a group, and extending the facet system in ggplot2 to facilitate exploration of samples of data. These methods are fully described in the paper "brolgar: An R package to Browse Over Longitudinal Data Graphically and Analytically in R", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) &lt;arXiv:2012.01619&gt;.'>
+<meta name="description" content='Provides a framework of tools to summarise, visualise, and explore longitudinal data. It builds upon the tidy time series data frames used in the tsibble package, and is designed to integrate within the tidyverse, and tidyverts (for time series) ecosystems. The methods implemented include calculating features for understanding longitudinal data, including calculating summary statistics such as quantiles, medians, and numeric ranges, sampling individual series, identifying individual series representative of a group, and extending the facet system in ggplot2 to facilitate exploration of samples of data. These methods are fully described in the paper "brolgar: An R package to Browse Over Longitudinal Data Graphically and Analytically in R", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) &lt;doi:10.32614/RJ-2022-023&gt;.'>
 <title>Browse Over Longitudinal Data Graphically and Analytically in R • brolgar</title>
 <script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 <link href="deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet">
 <script src="deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
 <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
 <!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.11/clipboard.min.js" integrity="sha512-7O5pXpc0oCRrxk8RUfDYFgn0nO1t+jLuIOQdOMRp4APB7uZ4vSjspzp5y6YDtDs4VzUSTbWzBFZ/LKJhnyFOKw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="pkgdown.js"></script><meta property="og:title" content="Browse Over Longitudinal Data Graphically and Analytically in R">
-<meta property="og:description" content='Provides a framework of tools to summarise, visualise, and explore longitudinal data. It builds upon the tidy time series data frames used in the tsibble package, and is designed to integrate within the tidyverse, and tidyverts (for time series) ecosystems. The methods implemented include calculating features for understanding longitudinal data, including calculating summary statistics such as quantiles, medians, and numeric ranges, sampling individual series, identifying individual series representative of a group, and extending the facet system in ggplot2 to facilitate exploration of samples of data. These methods are fully described in the paper "brolgar: An R package to Browse Over Longitudinal Data Graphically and Analytically in R", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) &lt;arXiv:2012.01619&gt;.'>
+<meta property="og:description" content='Provides a framework of tools to summarise, visualise, and explore longitudinal data. It builds upon the tidy time series data frames used in the tsibble package, and is designed to integrate within the tidyverse, and tidyverts (for time series) ecosystems. The methods implemented include calculating features for understanding longitudinal data, including calculating summary statistics such as quantiles, medians, and numeric ranges, sampling individual series, identifying individual series representative of a group, and extending the facet system in ggplot2 to facilitate exploration of samples of data. These methods are fully described in the paper "brolgar: An R package to Browse Over Longitudinal Data Graphically and Analytically in R", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) &lt;doi:10.32614/RJ-2022-023&gt;.'>
 <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
@@ -363,7 +363,7 @@ <h1 id="a-note-on-the-api">A Note on the API<a class="anchor" aria-label="anchor
 <div class="section level1">
 <h1 id="acknowledgements">Acknowledgements<a class="anchor" aria-label="anchor" href="#acknowledgements"></a>
 </h1>
-<p>Thank you to <a href="https://blog.mitchelloharawild.com/" class="external-link">Mitchell O’Hara-Wild</a> and <a href="https://earo.me/" class="external-link">Earo Wang</a> for many useful discussions on the implementation of brolgar, as it was heavily inspired by the <a href="https://github.com/tidyverts/feasts" class="external-link"><code>feasts</code></a> package from the <a href="https://tidyverts.org/" class="external-link"><code>tidyverts</code></a>. I would also like to thank <a href="https://researchers.mq.edu.au/en/persons/tania-prvan" class="external-link">Tania Prvan</a> for her valuable early contributions to the project, as well as <a href="https://stuartlee.org/" class="external-link">Stuart Lee</a> for helpful discussions. Thanks also to <a href="https://uschilaa.github.io/" class="external-link">Ursula Laa</a> for her feedback on the package structure and documentation.</p>
+<p>Thank you to <a href="https://mitchelloharawild.com/blog.html" class="external-link">Mitchell O’Hara-Wild</a> and <a href="https://earo.me/" class="external-link">Earo Wang</a> for many useful discussions on the implementation of brolgar, as it was heavily inspired by the <a href="https://github.com/tidyverts/feasts" class="external-link"><code>feasts</code></a> package from the <a href="https://tidyverts.org/" class="external-link"><code>tidyverts</code></a>. I would also like to thank <a href="https://researchers.mq.edu.au/en/persons/tania-prvan" class="external-link">Tania Prvan</a> for her valuable early contributions to the project, as well as <a href="https://stuartlee.org/" class="external-link">Stuart Lee</a> for helpful discussions. Thanks also to <a href="https://uschilaa.github.io/" class="external-link">Ursula Laa</a> for her feedback on the package structure and documentation.</p>
 <!-- These are referred to as a **longnostics**, a portmanteau of **long**itudinal and **cognostic**. These **longnostics** make it straightforward to extract subjects with certain properties to gain some insight into the data.  -->
 <!-- But calculating this for individuals draws you away from your analysis, and instead you are now wrangling with a different problem: summarising key information about each individual and incorporating that back into the data.  -->
 </div>
diff --git a/pkgdown.yml b/pkgdown.yml
index 6e317b0..f3833cd 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -9,7 +9,7 @@ articles:
   longitudinal-data-structures: longitudinal-data-structures.html
   mixed-effects-models: mixed-effects-models.html
   visualisation-gallery: visualisation-gallery.html
-last_built: 2024-05-10T06:44Z
+last_built: 2024-05-10T07:47Z
 urls:
   reference: https://brolgar.njtierney.com/reference
   article: https://brolgar.njtierney.com/articles
diff --git a/reference/figures/README-facet-sample-1.png b/reference/figures/README-facet-sample-1.png
index ee067a0..18597ef 100644
Binary files a/reference/figures/README-facet-sample-1.png and b/reference/figures/README-facet-sample-1.png differ
diff --git a/reference/figures/README-features-left-join-1.png b/reference/figures/README-features-left-join-1.png
index b44b8b9..08612f3 100644
Binary files a/reference/figures/README-features-left-join-1.png and b/reference/figures/README-features-left-join-1.png differ
diff --git a/reference/figures/README-plot-sample-n-keys-1.png b/reference/figures/README-plot-sample-n-keys-1.png
index 62c80c7..50199f5 100644
Binary files a/reference/figures/README-plot-sample-n-keys-1.png and b/reference/figures/README-plot-sample-n-keys-1.png differ
diff --git a/reference/figures/README-show-spaghetti-1.png b/reference/figures/README-show-spaghetti-1.png
index ec4f187..7035785 100644
Binary files a/reference/figures/README-show-spaghetti-1.png and b/reference/figures/README-show-spaghetti-1.png differ
diff --git a/reference/figures/README-show-wages-lg-1.png b/reference/figures/README-show-wages-lg-1.png
index 19b3443..d89766a 100644
Binary files a/reference/figures/README-show-wages-lg-1.png and b/reference/figures/README-show-wages-lg-1.png differ
diff --git a/reference/figures/README-summarise-n-obs-1.png b/reference/figures/README-summarise-n-obs-1.png
index 958a4bf..e7394d2 100644
Binary files a/reference/figures/README-summarise-n-obs-1.png and b/reference/figures/README-summarise-n-obs-1.png differ
diff --git a/search.json b/search.json
index 321295a..307a78a 100644
--- a/search.json
+++ b/search.json
@@ -1 +1 @@
-[{"path":"https://brolgar.njtierney.com/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (https://www.contributor-covenant.org), version 1.0.0, available https://contributor-covenant.org/version/1/0/0/.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to brolgar","title":"Contributing to brolgar","text":"outlines propose change brolgar. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to brolgar","text":"Small typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES: edit roxygen comment .R file R/. : edit .Rd file man/.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"prerequisites","dir":"","previous_headings":"","what":"Prerequisites","title":"Contributing to brolgar","text":"make substantial pull request, always file issue make sure someone team agrees ’s problem. ’ve found bug, create associated issue illustrate bug minimal reprex.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"","what":"Pull request process","title":"Contributing to brolgar","text":"recommend create Git branch pull request (PR). Look Travis AppVeyor build status making changes. README contain badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat. Contributions test cases included easier accept. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s).","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to brolgar","text":"Please note brolgar project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"see-tidyverse-development-contributing-guide","dir":"","previous_headings":"","what":"See tidyverse development contributing guide","title":"Contributing to brolgar","text":"details.","code":""},{"path":"https://brolgar.njtierney.com/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2019 Nicholas Tierney, Di Cook, Tania Prvan Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://brolgar.njtierney.com/articles/exploratory-modelling.html","id":"find-keys-near-other-summaries-with-keys_near","dir":"Articles","previous_headings":"","what":"Find keys near other summaries with keys_near()","title":"Exploratory Modelling","text":"might want summarise exploratory modelling finding slopes near five number summary values: Finding groups near values can surprisingly challenging! brolgar makes easier providing keys_near() function. tell key , variable want summarise , default returns keys near five number summary. Let’s return keys near .slope_xp: returns id, .slope_xp, statistic closest , difference slope_xp statistic. can visualise summary keys joining back data:  can read keys_near() Identifying interesting observations vignette.","code":"summary(wages_slope$.slope_xp) #>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's  #> -4.57692 -0.00189  0.04519  0.04490  0.08458 13.21569       38 wages_slope %>%   keys_near(key = id,             var = .slope_xp) #> # A tibble: 31 × 5 #>       id .slope_xp stat  stat_value stat_diff #>    <int>     <dbl> <fct>      <dbl>     <dbl> #>  1  2092  -0.00189 q_25    -0.00189         0 #>  2  2092  -0.00189 q_25    -0.00189         0 #>  3  2092  -0.00189 q_25    -0.00189         0 #>  4  2092  -0.00189 q_25    -0.00189         0 #>  5  2092  -0.00189 q_25    -0.00189         0 #>  6  2092  -0.00189 q_25    -0.00189         0 #>  7  6770   0.0846  q_75     0.0846          0 #>  8  6770   0.0846  q_75     0.0846          0 #>  9  6770   0.0846  q_75     0.0846          0 #> 10  6770   0.0846  q_75     0.0846          0 #> # ℹ 21 more rows wages_slope %>%   keys_near(key = id,             var = .slope_xp) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"calculating-features","dir":"Articles","previous_headings":"","what":"Calculating features","title":"Finding Features in Data","text":"can calculate features longitudinal data using features function (fabletools, made available brolgar). features works specifying data, variable summarise, feature calculate: pipe: example, can calculate five number summary (minimum, 25th quantile, median, mean, 75th quantile, maximum) data using feat_five_num, like : taking wages data, piping features, telling summarise ln_wages variable, using feat_five_num. several handy functions calculating features data brolgar provides. start feat_. can, example, find whose values increase decrease feat_monotonic: used identify individuals increase like : joined back data plotted:  get sense data came , create plot gghighlight highlight increase, using gghighlight(increase) - since increase logical, tells gghighlight highlight TRUE.  can explore available features, see function References","code":"features(<DATA>, <VARIABLE>, <FEATURE>) <DATA> %>% features(<VARIABLE>, <FEATURE>) wages_five <- wages %>%   features(ln_wages, feat_five_num)  wages_five #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages_mono <- wages %>%   features(ln_wages, feat_monotonic)  wages_mono #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows library(dplyr) wages_mono %>%   filter(increase) #> # A tibble: 50 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1   206 TRUE     FALSE    FALSE  TRUE      #>  2   295 TRUE     FALSE    FALSE  TRUE      #>  3   518 TRUE     FALSE    FALSE  TRUE      #>  4  1508 TRUE     FALSE    FALSE  TRUE      #>  5  2178 TRUE     FALSE    FALSE  TRUE      #>  6  2194 TRUE     FALSE    FALSE  TRUE      #>  7  2330 TRUE     FALSE    FALSE  TRUE      #>  8  2456 TRUE     FALSE    FALSE  TRUE      #>  9  2612 TRUE     FALSE    FALSE  TRUE      #> 10  2890 TRUE     FALSE    FALSE  TRUE      #> # ℹ 40 more rows wages_mono_join <- wages_mono %>%   filter(increase) %>%   left_join(wages, by = \"id\")  wages_mono_join #> # A tibble: 164 × 13 #>       id increase decrease unvary monotonic ln_wages    xp   ged xp_since_ged #>    <int> <lgl>    <lgl>    <lgl>  <lgl>        <dbl> <dbl> <int>        <dbl> #>  1   206 TRUE     FALSE    FALSE  TRUE          2.03 1.87      0        0     #>  2   206 TRUE     FALSE    FALSE  TRUE          2.30 2.81      0        0     #>  3   206 TRUE     FALSE    FALSE  TRUE          2.48 4.31      0        0     #>  4   295 TRUE     FALSE    FALSE  TRUE          1.79 2.03      0        0     #>  5   295 TRUE     FALSE    FALSE  TRUE          1.81 3.12      0        0     #>  6   295 TRUE     FALSE    FALSE  TRUE          2.11 4.16      0        0     #>  7   295 TRUE     FALSE    FALSE  TRUE          2.13 5.08      0        0     #>  8   295 TRUE     FALSE    FALSE  TRUE          2.31 6.58      0        0     #>  9   518 TRUE     FALSE    FALSE  TRUE          1.27 0.525     1        0.525 #> 10   518 TRUE     FALSE    FALSE  TRUE          1.61 1.93      1        1.93  #> # ℹ 154 more rows #> # ℹ 4 more variables: black <int>, hispanic <int>, high_grade <int>, #> #   unemploy_rate <dbl> ggplot(wages_mono_join,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line() library(gghighlight) wages_mono %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"creating-your-own-features","dir":"Articles","previous_headings":"","what":"Creating your own Features","title":"Finding Features in Data","text":"create features summaries pass features, provide named list functions. example: passed features like : Inside brolgar, features created following syntax: functions b_ functions default na.rm = TRUE, cases quantiles, use type = 8, names = FALSE.","code":"library(brolgar) feat_three <- list(min = min,                    med = median,                    max = max)  feat_three #> $min #> function (..., na.rm = FALSE)  .Primitive(\"min\") #>  #> $med #> function (x, na.rm = FALSE, ...)  #> UseMethod(\"median\") #> <bytecode: 0x5612f04c6d90> #> <environment: namespace:stats> #>  #> $max #> function (..., na.rm = FALSE)  .Primitive(\"max\") wages %>%   features(ln_wages, feat_three) #> # A tibble: 888 × 4 #>       id   min   med   max #>    <int> <dbl> <dbl> <dbl> #>  1    31 1.43   1.73  2.13 #>  2    36 1.80   2.32  2.93 #>  3    53 1.54   1.71  3.24 #>  4   122 0.763  2.19  2.92 #>  5   134 2.00   2.36  2.93 #>  6   145 1.48   1.77  2.04 #>  7   155 1.54   2.22  2.64 #>  8   173 1.56   2.00  2.34 #>  9   206 2.03   2.30  2.48 #> 10   207 1.58   2.15  2.66 #> # ℹ 878 more rows  heights %>%   features(height_cm, feat_three) #> # A tibble: 144 × 4 #>    country       min   med   max #>    <chr>       <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  167.  168. #>  2 Albania      168.  170.  170. #>  3 Algeria      166.  169   171. #>  4 Angola       159.  167.  169. #>  5 Argentina    167.  168.  174. #>  6 Armenia      164.  169.  172. #>  7 Australia    170   172.  178. #>  8 Austria      162.  167.  179. #>  9 Azerbaijan   170.  172.  172. #> 10 Bahrain      161.  164.  164  #> # ℹ 134 more rows feat_five_num <- function(x, ...) {   list(     min = b_min(x, ...),     q25 = b_q25(x, ...),     med = b_median(x, ...),     q75 = b_q75(x, ...),     max = b_max(x, ...)   ) }"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"accessing-sets-of-features","dir":"Articles","previous_headings":"","what":"Accessing sets of features","title":"Finding Features in Data","text":"want run many features package data can collect feature_set. example: run like : information see ?fabletools::feature_set","code":"library(fabletools) feat_brolgar <- feature_set(pkgs = \"brolgar\") length(feat_brolgar) #> [1] 6 wages %>%   features(ln_wages, feat_brolgar) #> # A tibble: 888 × 46 #>       id min...1 med...2 max...3 min...4 q25...5 med...6 q75...7 max...8 min...9 #>    <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> #>  1    31   1.43     1.73    2.13   1.43     1.48    1.73    2.02    2.13   1.43  #>  2    36   1.80     2.32    2.93   1.80     1.97    2.32    2.59    2.93   1.80  #>  3    53   1.54     1.71    3.24   1.54     1.58    1.71    1.89    3.24   1.54  #>  4   122   0.763    2.19    2.92   0.763    2.10    2.19    2.46    2.92   0.763 #>  5   134   2.00     2.36    2.93   2.00     2.28    2.36    2.79    2.93   2.00  #>  6   145   1.48     1.77    2.04   1.48     1.58    1.77    1.89    2.04   1.48  #>  7   155   1.54     2.22    2.64   1.54     1.83    2.22    2.44    2.64   1.54  #>  8   173   1.56     2.00    2.34   1.56     1.68    2.00    2.05    2.34   1.56  #>  9   206   2.03     2.30    2.48   2.03     2.07    2.30    2.45    2.48   2.03  #> 10   207   1.58     2.15    2.66   1.58     1.87    2.15    2.26    2.66   1.58  #> # ℹ 878 more rows #> # ℹ 36 more variables: max...10 <dbl>, range_diff...11 <dbl>, iqr...12 <dbl>, #> #   var...13 <dbl>, sd...14 <dbl>, mad...15 <dbl>, iqr...16 <dbl>, #> #   min...17 <dbl>, max...18 <dbl>, median <dbl>, mean <dbl>, q25...21 <dbl>, #> #   q75...22 <dbl>, range1 <dbl>, range2 <dbl>, range_diff...25 <dbl>, #> #   sd...26 <dbl>, var...27 <dbl>, mad...28 <dbl>, iqr...29 <dbl>, #> #   increase...30 <dbl>, decrease...31 <dbl>, unvary...32 <dbl>, …"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"registering-a-feature-in-a-package","dir":"Articles","previous_headings":"","what":"Registering a feature in a package","title":"Finding Features in Data","text":"create features package want make accessible feature_set, following. Functions can registered via fabletools::register_feature(). register features package, create file called zzz.R, use .onLoad(...) function set loading package:","code":".onLoad <- function(...) {   fabletools::register_feature(feat_three_num, c(\"summary\"))   # ... and as many as you want here! }"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"setting-up-your-data","dir":"Articles","previous_headings":"","what":"Setting up your data","title":"Getting Started","text":"use brolgar work, convert longitudinal data time series tsibble using tsibble package. , need identify unique identifying key, time index. example: learn longitudinal data time series, see vignette: Longitudinal Data Structures.","code":"wages <- as_tsibble(wages,                     key = id,                     index = xp,                     regular = FALSE)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"basic-summaries-of-the-data","dir":"Articles","previous_headings":"","what":"Basic summaries of the data","title":"Getting Started","text":"first get dataset, need get overall sense data.","code":""},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"how-many-observations-are-there","dir":"Articles","previous_headings":"Basic summaries of the data","what":"How many observations are there?","title":"Getting Started","text":"can kind number keys using n_keys(): Note single number, case, 888 observations. However, might want know many observations individual. want number observations variable, can use n_obs() features(). plot can help provide better understanding distribution observations.","code":"n_keys(wages) #> [1] 888 wages %>%   features(ln_wages, n_obs) #> # A tibble: 888 × 2 #>       id n_obs #>    <int> <int> #>  1    31     8 #>  2    36    10 #>  3    53     8 #>  4   122    10 #>  5   134    12 #>  6   145     9 #>  7   155    11 #>  8   173     6 #>  9   206     3 #> 10   207    11 #> # ℹ 878 more rows library(ggplot2) wages %>%   features(ln_wages, n_obs) %>%   ggplot(aes(x = n_obs)) +    geom_bar()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"add_n_obs","dir":"Articles","previous_headings":"Basic summaries of the data > How many observations are there?","what":"add_n_obs()","title":"Getting Started","text":"can add information number observations key add_n_obs(): can use filter() observations: can also look distance experience, understand distribution experience  can explore range experience see common experience ","code":"wages %>% add_n_obs() #> # A tsibble: 6,402 x 10 [!] #> # Key:       id [888] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(dplyr) wages %>%    add_n_obs() %>%   filter(n_obs > 3) #> # A tsibble: 6,145 x 10 [!] #> # Key:       id [764] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,135 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> wages_xp_range <- wages %>%    features(xp,            feat_ranges)  ggplot(wages_xp_range,        aes(x = range_diff)) +    geom_histogram() wages_xp_range %>%    count(range_diff) %>%    mutate(prop = n / sum(n))  #> # A tibble: 829 × 3 #>    range_diff     n    prop #>         <dbl> <int>   <dbl> #>  1     0         38 0.0428  #>  2     0.0150     1 0.00113 #>  3     0.068      1 0.00113 #>  4     0.137      1 0.00113 #>  5     0.153      1 0.00113 #>  6     0.185      1 0.00113 #>  7     0.22       1 0.00113 #>  8     0.225      1 0.00113 #>  9     0.231      1 0.00113 #> 10     0.26       1 0.00113 #> # ℹ 819 more rows"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"efficiently-exploring-longitudinal-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Efficiently exploring longitudinal data","title":"Getting Started","text":"avoid staring plate spaghetti, can look random subset data. Brolgar provides intuitive functions help .","code":""},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"sample_n_keys","dir":"Articles","previous_headings":"Basic summaries of the data > Efficiently exploring longitudinal data","what":"sample_n_keys()","title":"Getting Started","text":"dplyr, can use sample_n() sample n observations. Similarly, brolgar, can take random sample n keys using sample_n_keys():","code":"set.seed(2019-7-15-1300) wages %>%   sample_n_keys(size = 10) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"filtering-observations","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Filtering observations","title":"Getting Started","text":"can combine sample_n_keys() add_n_obs() filter() show keys many observations:  (Note: sample_frac_keys(), samples fraction available keys.) Now, break many plots?","code":"library(dplyr) wages %>%   add_n_obs() %>%   filter(n_obs > 5) %>%   sample_n_keys(size = 10) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"clever-facets-facet_strata","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Clever facets: facet_strata","title":"Getting Started","text":"brolgar provides clever facets help make easier explore data. facet_strata() splits data 12 groups default:  ask split data groups  want show samples per facet?","code":"set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata() set.seed(2019-07-25-1450) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 20)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"clever-facets-facet_sample","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Clever facets: facet_sample","title":"Getting Started","text":"facet_sample() allows specify number keys per facet, number facets n_per_facet n_facets. splits data 12 facets 3 per facet default:  can specify number:  hood, facet_sample() facet_strata() use sample_n_keys() stratify_keys().","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample() set.seed(2019-07-25-1533) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample(n_per_facet = 3,                n_facets = 20)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"exploratory-modelling","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Exploratory modelling","title":"Getting Started","text":"can fit linear model key using key_slope(). returns intercept slope estimate key, given linear model formula. can get number observations, slope information individual identify decreasing time. can join summaries back data: highlight individuals negative slope using gghighlight:","code":"key_slope(wages,ln_wages ~ xp) #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows library(dplyr) wages_slope <- key_slope(wages,ln_wages ~ xp) %>%   left_join(wages, by = \"id\")   wages_slope #> # A tibble: 6,402 × 11 #>       id .intercept .slope_xp ln_wages    xp   ged xp_since_ged black hispanic #>    <int>      <dbl>     <dbl>    <dbl> <dbl> <int>        <dbl> <int>    <int> #>  1    31       1.41    0.101      1.49 0.015     1        0.015     0        1 #>  2    31       1.41    0.101      1.43 0.715     1        0.715     0        1 #>  3    31       1.41    0.101      1.47 1.73      1        1.73      0        1 #>  4    31       1.41    0.101      1.75 2.77      1        2.77      0        1 #>  5    31       1.41    0.101      1.93 3.93      1        3.93      0        1 #>  6    31       1.41    0.101      1.71 4.95      1        4.95      0        1 #>  7    31       1.41    0.101      2.09 5.96      1        5.96      0        1 #>  8    31       1.41    0.101      2.13 6.98      1        6.98      0        1 #>  9    36       2.04    0.0588     1.98 0.315     1        0.315     0        0 #> 10    36       2.04    0.0588     1.80 0.983     1        0.983     0        0 #> # ℹ 6,392 more rows #> # ℹ 2 more variables: high_grade <int>, unemploy_rate <dbl> library(gghighlight)  wages_slope %>%    as_tibble() %>% # workaround for gghighlight + tsibble   ggplot(aes(x = xp,               y = ln_wages,               group = id)) +    geom_line() +   gghighlight(.slope_xp < 0)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"find-keys-near-other-summaries-with-keys_near","dir":"Articles","previous_headings":"Basic summaries of the data > Exploratory modelling","what":"Find keys near other summaries with keys_near","title":"Getting Started","text":"take slope information find individuals representative min, median, maximum, etc growth, using keys_near():","code":"wages_slope %>%   keys_near(key = id,             var = .slope_xp,             funs = l_three_num) #> # A tibble: 13 × 5 #>       id .slope_xp stat  stat_value stat_diff #>    <int>     <dbl> <fct>      <dbl>     <dbl> #>  1  6863    0.0452 med       0.0452         0 #>  2  6863    0.0452 med       0.0452         0 #>  3  6863    0.0452 med       0.0452         0 #>  4  6863    0.0452 med       0.0452         0 #>  5  6863    0.0452 med       0.0452         0 #>  6  6863    0.0452 med       0.0452         0 #>  7  6863    0.0452 med       0.0452         0 #>  8  6863    0.0452 med       0.0452         0 #>  9  7918   -4.58   min      -4.58           0 #> 10  7918   -4.58   min      -4.58           0 #> 11  7918   -4.58   min      -4.58           0 #> 12 12455   13.2    max      13.2            0 #> 13 12455   13.2    max      13.2            0 wages_slope %>%   keys_near(key = id,             var = .slope_xp,             funs = l_three_num) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"finding-features-in-longitudinal-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Finding features in longitudinal data","title":"Getting Started","text":"can extract features longitudinal data using features function, fabletools. can, example, calculate minimum given variable key providing named list like : brolgar provides sets features, start feat_. example, five number summary feat_five_num: finding whose values increase decrease feat_monotonic","code":"wages %>%   features(ln_wages,             list(min = min)) #> # A tibble: 888 × 2 #>       id   min #>    <int> <dbl> #>  1    31 1.43  #>  2    36 1.80  #>  3    53 1.54  #>  4   122 0.763 #>  5   134 2.00  #>  6   145 1.48  #>  7   155 1.54  #>  8   173 1.56  #>  9   206 2.03  #> 10   207 1.58  #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_monotonic) #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"linking-individuals-back-to-the-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Linking individuals back to the data","title":"Getting Started","text":"can join features back data left_join, like :","code":"wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/id-interesting-obs.html","id":"specify-your-own-summaries-for-keys_near","dir":"Articles","previous_headings":"","what":"Specify your own summaries for keys_near","title":"Identify Interesting Observations","text":"can specify list summaries pass keys_near. example, create summaries give sense range. Note functions start b_, b_summaries provided brolgar sensible defaults. can read , ?b_summaries","code":"l_ranges <- list(min = b_min,                 range_diff = b_range_diff,                 max = b_max,                 iqr = b_iqr)  wages %>%  key_slope(formula = ln_wages ~ xp) %>%  keys_near(key = id,            var = .slope_xp,            funs = l_ranges) #> # A tibble: 4 × 5 #>      id .slope_xp stat       stat_value stat_diff #>   <int>     <dbl> <fct>           <dbl>     <dbl> #> 1  7918    -4.58  min            -4.58   0        #> 2  9357     0.103 iqr             0.102  0.000139 #> 3 12455    13.2   range_diff     17.8    4.58     #> 4 12455    13.2   max            13.2    0"},{"path":"https://brolgar.njtierney.com/articles/id-interesting-obs.html","id":"implementation-of-keys_near","dir":"Articles","previous_headings":"","what":"Implementation of keys_near","title":"Identify Interesting Observations","text":"interested specifics keys_near() works, section describes implemented brolgar. get data right format, steps. First, need get data format statistics interested , along id, statistic interest. can fit linear model key dataset using key_slope(). can perform summary statistic interest, case slope. need convert long format can calculate difference stat slope, .slope_xp: stats diff, can group stat, find return rows smallest difference statistic value:  can see get output using keys_near():","code":"wages_slope <- key_slope(wages, ln_wages ~ xp)  wages_slope #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows wages_slope_all_stats <- wages_slope %>%   mutate_at(.vars = vars(.slope_xp),             .funs = list(.slope_min = b_min,                          .slope_max = b_max,                          .slope_median = b_median,                          .slope_q1 = b_q25,                          .slope_q3 = b_q75)) %>%   select(id,          starts_with(\".slope\"))  wages_slope_all_stats #> # A tibble: 888 × 7 #>       id .slope_xp .slope_min .slope_max .slope_median .slope_q1 .slope_q3 #>    <int>     <dbl>      <dbl>      <dbl>         <dbl>     <dbl>     <dbl> #>  1    31    0.101       -4.58       13.2        0.0480  -0.00769    0.0947 #>  2    36    0.0588      -4.58       13.2        0.0480  -0.00769    0.0947 #>  3    53   -0.358       -4.58       13.2        0.0480  -0.00769    0.0947 #>  4   122    0.0374      -4.58       13.2        0.0480  -0.00769    0.0947 #>  5   134    0.0831      -4.58       13.2        0.0480  -0.00769    0.0947 #>  6   145    0.0469      -4.58       13.2        0.0480  -0.00769    0.0947 #>  7   155    0.0867      -4.58       13.2        0.0480  -0.00769    0.0947 #>  8   173    0.100       -4.58       13.2        0.0480  -0.00769    0.0947 #>  9   206    0.180       -4.58       13.2        0.0480  -0.00769    0.0947 #> 10   207    0.0884      -4.58       13.2        0.0480  -0.00769    0.0947 #> # ℹ 878 more rows wages_slope_all_stats_long <-  wages_slope_all_stats %>% gather(key = \"stat\",          value = \"stat_value\",          -id,          -.slope_xp)  wages_slope_all_stats_long #> # A tibble: 4,440 × 4 #>       id .slope_xp stat       stat_value #>    <int>     <dbl> <chr>           <dbl> #>  1    31    0.101  .slope_min      -4.58 #>  2    36    0.0588 .slope_min      -4.58 #>  3    53   -0.358  .slope_min      -4.58 #>  4   122    0.0374 .slope_min      -4.58 #>  5   134    0.0831 .slope_min      -4.58 #>  6   145    0.0469 .slope_min      -4.58 #>  7   155    0.0867 .slope_min      -4.58 #>  8   173    0.100  .slope_min      -4.58 #>  9   206    0.180  .slope_min      -4.58 #> 10   207    0.0884 .slope_min      -4.58 #> # ℹ 4,430 more rows stats_diff <-  wages_slope_all_stats_long %>%   mutate(stat_diff = abs(.slope_xp - stat_value))  stats_diff #> # A tibble: 4,440 × 5 #>       id .slope_xp stat       stat_value stat_diff #>    <int>     <dbl> <chr>           <dbl>     <dbl> #>  1    31    0.101  .slope_min      -4.58      4.68 #>  2    36    0.0588 .slope_min      -4.58      4.64 #>  3    53   -0.358  .slope_min      -4.58      4.22 #>  4   122    0.0374 .slope_min      -4.58      4.61 #>  5   134    0.0831 .slope_min      -4.58      4.66 #>  6   145    0.0469 .slope_min      -4.58      4.62 #>  7   155    0.0867 .slope_min      -4.58      4.66 #>  8   173    0.100  .slope_min      -4.58      4.68 #>  9   206    0.180  .slope_min      -4.58      4.76 #> 10   207    0.0884 .slope_min      -4.58      4.67 #> # ℹ 4,430 more rows top_stats_diff <-  stats_diff %>%   group_by(stat) %>%   top_n(-1,         wt = stat_diff)  top_stats_diff #> # A tibble: 6 × 5 #> # Groups:   stat [5] #>      id .slope_xp stat          stat_value stat_diff #>   <int>     <dbl> <chr>              <dbl>     <dbl> #> 1  7918  -4.58    .slope_min      -4.58    0         #> 2 12455  13.2     .slope_max      13.2     0         #> 3  2305   0.0480  .slope_median    0.0480  0.0000498 #> 4 10380   0.0479  .slope_median    0.0480  0.0000498 #> 5  2594  -0.00768 .slope_q1       -0.00769 0.0000127 #> 6 12178   0.0946  .slope_q3        0.0947  0.0000579 top_stats_diff %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line() wages %>%   key_slope(ln_wages ~ xp) %>%   keys_near(key = id,             var = .slope_xp) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"defining-longitudinal-data-as-a-tsibble","dir":"Articles","previous_headings":"","what":"Defining longitudinal data as a tsibble","title":"Longitudinal Data Structures","text":"tools workflows brolgar designed work special tidy time series data frame called tsibble. can define longitudinal data terms time series gain access really useful tools. , need identify three components: key variable data identifier individual. index variable time component data. regularity time interval (index). Longitudinal data typically irregular time periods measurements, can regular measurements. Together, time index key uniquely identify observation repeated measurements term key used lot brolgar, important idea internalise: key identifier individuals series care defining longitudinal data time series? account time series structure inherent longitudinal data, gain access suite nice tools simplify accelerate work time series data. brolgar built top powerful tsibble package Earo Wang, like learn , see official package documentation read paper.","code":""},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"converting-your-longitudinal-data-to-a-time-series","dir":"Articles","previous_headings":"Defining longitudinal data as a tsibble","what":"Converting your longitudinal data to a time series","title":"Longitudinal Data Structures","text":"convert longitudinal data “time series tibble”, tsibble, need consider variables identify: individual, repeated measurements. key time component, index . regularity time interval (index). Together, time index key uniquely identify observation repeated measurements vignette now walks examples converting longitudinal data tsibble.","code":""},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-data-wages","dir":"Articles","previous_headings":"","what":"example data: wages","title":"Longitudinal Data Structures","text":"Let’s look wages data analysed Singer & Willett (2003). data contains measurements hourly wages years workforce, education race covariates. population measured male high-school dropouts, aged 14 17 years first measured. first 10 rows data. create tsibble data ask, “variables identify…”: key, individual, repeated measurements. index, time component. regularity time interval (index). Together, time index key uniquely identify observation repeated measurements , can say : key variable id - subject id, 1-888. index variable xp experience years individual . data irregular since experience fraction year integer. can use information create tsibble data using as_tsibble Note regular = FALSE, since irregular time series Note following information printed top wages says: 6402 rows, 9 columns. ! top means regular spacing series “key” variable listed - id, 888.","code":"library(brolgar) suppressPackageStartupMessages(library(dplyr)) slice(wages, 1:10) %>% knitr::kable() library(tsibble) as_tsibble(x = wages,            key = id,            index = xp,            regular = FALSE) #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> # A tsibble: 6,402 x 9 [!] # Key:       id [888] ..."},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-heights-data","dir":"Articles","previous_headings":"","what":"example: heights data","title":"Longitudinal Data Structures","text":"heights data little simpler wages data, contains average male heights 144 countries 1810-1989, smaller number countries 1500-1800. contains four variables: country continent year height_cm create tsibble data ask, “variables identify…”: key, individual, repeated measurements. index, time component. regularity time interval (index). case: individual person, country time year year regular measurements fixed year point. data already tsibble object, can create tsibble following code:","code":"as_tsibble(x = heights,            key = country,            index = year,            regular = FALSE) #> # A tsibble: 1,490 x 4 [!] #> # Key:       country [144] #>    country     continent  year height_cm #>    <chr>       <chr>     <dbl>     <dbl> #>  1 Afghanistan Asia       1870      168. #>  2 Afghanistan Asia       1880      166. #>  3 Afghanistan Asia       1930      167. #>  4 Afghanistan Asia       1990      167. #>  5 Afghanistan Asia       2000      161. #>  6 Albania     Europe     1880      170. #>  7 Albania     Europe     1890      170. #>  8 Albania     Europe     1900      169. #>  9 Albania     Europe     2000      168. #> 10 Algeria     Africa     1910      169. #> # ℹ 1,480 more rows"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-gapminder","dir":"Articles","previous_headings":"","what":"example: gapminder","title":"Longitudinal Data Structures","text":"gapminder R package contains dataset subset gapminder study (link). contains data life expectancy, GDP per capita, population country. Let’s identify key, individual, repeated measurements. index, time component. regularity time interval (index). fact similar heights dataset: key country index year identify year regular, can bit data exploration using index_summary() shows us year every five - now know regular longitudinal dataset, can encoded like :","code":"library(gapminder) gapminder #> # A tibble: 1,704 × 6 #>    country     continent  year lifeExp      pop gdpPercap #>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> #>  1 Afghanistan Asia       1952    28.8  8425333      779. #>  2 Afghanistan Asia       1957    30.3  9240934      821. #>  3 Afghanistan Asia       1962    32.0 10267083      853. #>  4 Afghanistan Asia       1967    34.0 11537966      836. #>  5 Afghanistan Asia       1972    36.1 13079460      740. #>  6 Afghanistan Asia       1977    38.4 14880372      786. #>  7 Afghanistan Asia       1982    39.9 12881816      978. #>  8 Afghanistan Asia       1987    40.8 13867957      852. #>  9 Afghanistan Asia       1992    41.7 16317921      649. #> 10 Afghanistan Asia       1997    41.8 22227415      635. #> # ℹ 1,694 more rows gapminder %>%    group_by(country) %>%    index_summary(year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    1952    1966    1980    1980    1993    2007 as_tsibble(gapminder,            key = country,            index = year,            regular = TRUE) #> # A tsibble: 1,704 x 6 [5Y] #> # Key:       country [142] #>    country     continent  year lifeExp      pop gdpPercap #>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> #>  1 Afghanistan Asia       1952    28.8  8425333      779. #>  2 Afghanistan Asia       1957    30.3  9240934      821. #>  3 Afghanistan Asia       1962    32.0 10267083      853. #>  4 Afghanistan Asia       1967    34.0 11537966      836. #>  5 Afghanistan Asia       1972    36.1 13079460      740. #>  6 Afghanistan Asia       1977    38.4 14880372      786. #>  7 Afghanistan Asia       1982    39.9 12881816      978. #>  8 Afghanistan Asia       1987    40.8 13867957      852. #>  9 Afghanistan Asia       1992    41.7 16317921      649. #> 10 Afghanistan Asia       1997    41.8 22227415      635. #> # ℹ 1,694 more rows"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-pisa-data","dir":"Articles","previous_headings":"","what":"example: PISA data","title":"Longitudinal Data Structures","text":"PISA study measures school students around world series math, reading, science scores. subset data looks like : Let’s identify key, individual, repeated measurements. index, time component. regularity time interval (index). looks like key student_id, nested within school_id country, index year, write following can assess regularity year like : can now convert tsibble:","code":"pisa #> # A tibble: 433 × 11 #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl> as_tsibble(pisa,             key = c(country),            index = year) index_regular(pisa, year) #> [1] TRUE index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018 pisa_ts <- as_tsibble(pisa,            key = country,            index = year,            regular = TRUE)  pisa_ts #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Longitudinal Data Structures","text":"idea longitudinal data core brolgar. Understanding longitudinal data , can linked time series representation data helps us understand data structure, gives us access flexible tools. vignettes package show time series tsibble useful.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"exploring-raw-data","dir":"Articles","previous_headings":"","what":"Exploring raw data","title":"Visualisation Gallery","text":"first receive data, want look much raw data possible. section discusses techniques make palatable explore raw data without getting much overplotting.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"select-a-sample-of-individuals","dir":"Articles","previous_headings":"Exploring raw data","what":"Select a sample of individuals","title":"Visualisation Gallery","text":"Sample n random individuals explore (Note: Possibly representative) example, can sample 20 random individuals, plot . (perhaps change sample_n_keys sample_id.)","code":"wages %>%   sample_n_keys(size = 20) #> # A tsibble: 128 x 9 [!] #> # Key:       id [20] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1  2389     2.28 0.154     0        0         0        0         10 #>  2  2389     2.15 1.07      0        0         0        0         10 #>  3  2389     2.30 2.15      0        0         0        0         10 #>  4  2389     1.80 3.06      0        0         0        0         10 #>  5  2389     1.76 3.92      1        0         0        0         10 #>  6  2389     2.00 4.57      1        0.648     0        0         10 #>  7  2389     2.39 5.69      1        1.77      0        0         10 #>  8  2389     2.07 6.57      1        2.65      0        0         10 #>  9  2389     2.20 7.51      1        3.59      0        0         10 #> 10  6269     1.71 2.34      1        1.61      0        0          8 #> # ℹ 118 more rows #> # ℹ 1 more variable: unemploy_rate <dbl>  wages %>%   sample_n_keys(size = 20) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"filter-only-those-with-certain-number-of-observations","dir":"Articles","previous_headings":"Exploring raw data","what":"Filter only those with certain number of observations","title":"Visualisation Gallery","text":"variety number observations data - , many. can filter number observations data using add_n_obs(), adds new column, n_obs, number observations key. can filter data based number observations, combine previous steps sample data using sample_n_keys().","code":"wages %>%   add_n_obs() #> # A tsibble: 6,402 x 10 [!] #> # Key:       id [888] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(dplyr) wages %>%   add_n_obs() %>%   filter(n_obs >= 5) %>%   sample_n_keys(size = 20) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-facet_strata","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets: facet_strata","title":"Visualisation Gallery","text":"brolgar provides clever facets help make easier explore data. facet_strata() splits data 12 groups default:  can control number n_strata:  regular control facet options:","code":"set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata() set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 6) set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 6,                nrow = 3,                ncol = 2)"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-facet_sample","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets: facet_sample","title":"Visualisation Gallery","text":"facet_sample() allows specify number samples per plot , “n per plot” number facets show “n facets”. default splits data 12 facets 3 per group:  allows look larger sample data.","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-with-number-of-observations","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets with number of observations","title":"Visualisation Gallery","text":"can combine add_n_obs() filter() show series 5 observations:  approaches allow view large sections raw data, point individuals “interesting”, sense outliers, representative middle group.","code":"set.seed(2019-07-23-1937) wages %>%   add_n_obs() %>%   filter(n_obs >= 5) %>% ggplot(aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"exploring-data-using-features","dir":"Articles","previous_headings":"","what":"Exploring data using features","title":"Visualisation Gallery","text":"can plot features data first identifying features interest joining back data. details explanation , see vignette, “Finding Features”.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"plot-monotonic-individual-series","dir":"Articles","previous_headings":"Exploring data using features","what":"Plot monotonic individual series","title":"Visualisation Gallery","text":"example, plot whose values increase decrease feat_monotonic gghighlight:  can explore available features, see function References","code":"library(gghighlight) wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"plot-individuals-with-negative-slope","dir":"Articles","previous_headings":"Exploring data using features","what":"Plot individuals with negative slope","title":"Visualisation Gallery","text":"can find individuals negative slope using key_slope. detail key_slope, see Exploratory Modelling vignette. key_slope fits linear model key, returns tibble key columns .intercept .slope_<varname>, explanatory variables. can use gghighlight identify individuals overall negative slope:  positive slope  even facet slope:","code":"wages %>% key_slope(ln_wages ~ xp) #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows library(dplyr) wages_slope <- wages %>%   key_slope(ln_wages ~ xp) %>%   left_join(wages, by = \"id\")  gg_wages_slope <- ggplot(wages_slope,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line()   gg_wages_slope +    gghighlight(.slope_xp < 0) gg_wages_slope +    gghighlight(.slope_xp > 0) gg_wages_slope +    facet_wrap(~.slope_xp > 0)"},{"path":[]},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"visualise-along-slope","dir":"Articles","previous_headings":"Move along features with facet_strata","what":"Visualise along slope","title":"Visualisation Gallery","text":"can use along argument facet_strata() break data according feature. catch data passed must tsibble. example, break data along .slope_xp variable 12 groups, default arranged descending order. groups broken positive slope negative.  along features five number summary:  move along minimum:  move along maximum:  move along median:  hood needs summarisation data arrange like , details implementation helpfile ?facet_strata.","code":"wages_slope <- wages %>%   key_slope(ln_wages ~ xp) %>%   # ensures that we keep the data as a `tsibble`   left_join(x = wages, y = ., by = \"id\")  gg_wages_slope <- ggplot(wages_slope,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line()   gg_wages_slope +   facet_strata(n_strata = 12,                along = .slope_xp) wages_five <- wages %>%     features(ln_wages, feat_five_num) %>%   # ensures that we keep the data as a `tsibble`   left_join(x = wages, y = ., by = \"id\")  wages_five #> # A tsibble: 6,402 x 14 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 6 more variables: unemploy_rate <dbl>, min <dbl>, q25 <dbl>, med <dbl>, #> #   q75 <dbl>, max <dbl> gg_wages_five <- ggplot(wages_five,                          aes(x = xp,                              y = ln_wages,                              group = id)) +                       geom_line()   gg_wages_five gg_wages_five +   facet_strata(n_strata = 12,                along = min) gg_wages_five +   facet_strata(n_strata = 12,                along = max) gg_wages_five +   facet_strata(n_strata = 12,                along = med)"},{"path":"https://brolgar.njtierney.com/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Nicholas Tierney. Author, maintainer. Di Cook. Author. Tania Prvan. Author. Stuart Lee. Contributor. Earo Wang. Contributor.","code":""},{"path":"https://brolgar.njtierney.com/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Tierney N, Cook D, Prvan T (2022). “R Journal: brolgar: R package BRowse Longitudinal Data Graphically Analytically R.” R Journal, 14, 6-25. ISSN 2073-4859, doi:10.32614/RJ-2022-023, https://doi.org/10.32614/RJ-2022-023.","code":"@Article{,   title = {The R Journal: brolgar: An R package to BRowse Over Longitudinal Data Graphically and Analytically in R},   author = {Nicholas Tierney and Di Cook and Tania Prvan},   journal = {The R Journal},   year = {2022},   volume = {14},   issue = {2},   pages = {6-25},   note = {https://doi.org/10.32614/RJ-2022-023},   issn = {2073-4859},   doi = {10.32614/RJ-2022-023}, }"},{"path":"https://brolgar.njtierney.com/index.html","id":"brolgar","dir":"","previous_headings":"","what":"Browse Over Longitudinal Data Graphically and Analytically in R","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"brolgar helps browse longitudinal data graphically analytically R, providing tools : Efficiently explore raw longitudinal data Calculate features (summaries) individuals Evaluate diagnostics statistical models helps go “plate spaghetti” plot left, “interesting observations” plot right.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Install GitHub : R Universe :","code":"# install.packages(\"remotes\") remotes::install_github(\"njtierney/brolgar\") # Enable this universe options(repos = c(     njtierney = 'https://njtierney.r-universe.dev',     CRAN = 'https://cloud.r-project.org')     )  # Install some packages install.packages('brolgar')"},{"path":"https://brolgar.njtierney.com/index.html","id":"using-brolgar-we-need-to-talk-about-data","dir":"","previous_headings":"","what":"Using brolgar: We need to talk about data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"many ways describe longitudinal data - panel data, cross-sectional data, time series. define longitudinal data : individuals repeatedly measured time. tools workflows brolgar designed work special tidy time series data frame called tsibble. can define longitudinal data terms time series gain access really useful tools. , need identify three components: key variable data identifier individual. index variable time component data. regularity time interval (index). Longitudinal data typically irregular time periods measurements, can regular measurements. Together, time index key uniquely identify observation. term key used lot brolgar, important idea internalise: key identifier individuals series Identifying key, index, regularity data can challenge. can learn specifying vignette, “Longitudinal Data Structures”.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"the-wages-data","dir":"","previous_headings":"","what":"The wages data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"wages data example dataset provided brolgar. looks like : hood, created following setup: as_tsibble() takes wages, key, index, state regular = FALSE (since regular time periods measurements). turns data tsibble object - powerful data abstraction made available tsibble package Earo Wang, like learn tsibble, see official package documentation read paper.","code":"wages #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> wages <- as_tsibble(x = wages,                     key = id,                     index = xp,                     regular = FALSE)"},{"path":"https://brolgar.njtierney.com/index.html","id":"efficiently-exploring-longitudinal-data","dir":"","previous_headings":"","what":"Efficiently exploring longitudinal data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Exploring longitudinal data can challenging many individuals. difficult look ! often get “plate spaghetti” plot, many lines plotted top . can avoid spaghetti looking random subset data using tools brolgar.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"sample_n_keys","dir":"","previous_headings":"","what":"sample_n_keys()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"dplyr, can use sample_n() sample n observations, sample_frac() look fraction observations. brolgar builds providing sample_n_keys() sample_frac_keys(). allows take random sample n keys using sample_n_keys(). example:  want create many plots?","code":"set.seed(2019-7-15-1300) wages %>%   sample_n_keys(size = 5) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/index.html","id":"clever-facets-facet_sample","dir":"","previous_headings":"","what":"Clever facets: facet_sample()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"facet_sample() allows specify number keys per facet, number facets n_per_facet n_facets. default, splits data 12 facets 5 per facet:  hood, facet_sample() powered sample_n_keys() stratify_keys(). can see facets (e.g., facet_strata()) data visualisations can make brolgar Visualisation Gallery.","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/index.html","id":"finding-features-in-longitudinal-data","dir":"","previous_headings":"","what":"Finding features in longitudinal data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Sometimes want know range summary variable individual. call summaries features data, can extracted using features function, fabletools. example, want answer question “summary wages individual?”. can use features() find five number summary (min, max, q1, q3, median) ln_wages feat_five_num: returns id, features. many features brolgar - features begin feat_. can, example, find whose ln_wages values increase decrease feat_monotonic: can read creating using features Finding Features vignette. can also see features time series feasts package.","code":"wages %>%   features(ln_wages,            feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_monotonic) #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows"},{"path":"https://brolgar.njtierney.com/index.html","id":"linking-individuals-back-to-the-data","dir":"","previous_headings":"","what":"Linking individuals back to the data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"created features, can join back data left_join, like :","code":"wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: id #> Too many data series, skip labeling"},{"path":[]},{"path":"https://brolgar.njtierney.com/index.html","id":"n_obs","dir":"","previous_headings":"","what":"n_obs()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Return number observations total n_obs():","code":"n_obs(wages) #> n_obs  #>  6402"},{"path":"https://brolgar.njtierney.com/index.html","id":"n_keys","dir":"","previous_headings":"","what":"n_keys()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"number keys data using n_keys():","code":"n_keys(wages) #> [1] 888"},{"path":"https://brolgar.njtierney.com/index.html","id":"finding-the-number-of-observations-per-key","dir":"","previous_headings":"","what":"Finding the number of observations per key.","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"can also use n_obs() inside features return number observations key: returns dataframe, one row per key, number observations key. summarised get sense patterns number observations:","code":"wages %>%   features(ln_wages, n_obs) #> # A tibble: 888 × 2 #>       id n_obs #>    <int> <int> #>  1    31     8 #>  2    36    10 #>  3    53     8 #>  4   122    10 #>  5   134    12 #>  6   145     9 #>  7   155    11 #>  8   173     6 #>  9   206     3 #> 10   207    11 #> # ℹ 878 more rows library(ggplot2) wages %>%   features(ln_wages, n_obs) %>%   ggplot(aes(x = n_obs)) +      geom_bar() wages %>%   features(ln_wages, n_obs) %>%   summary() #>        id            n_obs        #>  Min.   :   31   Min.   : 1.000   #>  1st Qu.: 3332   1st Qu.: 5.000   #>  Median : 6666   Median : 8.000   #>  Mean   : 6343   Mean   : 7.209   #>  3rd Qu.: 9194   3rd Qu.: 9.000   #>  Max.   :12543   Max.   :13.000"},{"path":"https://brolgar.njtierney.com/index.html","id":"further-reading","dir":"","previous_headings":"","what":"Further Reading","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"brolgar provides useful functions explore data, can read exploratory modelling Identify Interesting Observations vignettes. taster, figures can produce:","code":"#> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: id #> Too many data series, skip labeling #> Warning in left_join(., wages, by = \"id\"): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1077 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #>   \"many-to-many\"` to silence this warning."},{"path":"https://brolgar.njtierney.com/index.html","id":"related-work","dir":"","previous_headings":"","what":"Related work","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"One sources inspiration work lasangar R package Bryan Swihart (paper). even expansive time series summarisation, make sure check feasts package (talk!).","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Please note brolgar project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"a-note-on-the-api","dir":"","previous_headings":"","what":"A Note on the API","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"version brolgar forked tprvan/brolgar, undergone breaking changes API.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"acknowledgements","dir":"","previous_headings":"","what":"Acknowledgements","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Thank Mitchell O’Hara-Wild Earo Wang many useful discussions implementation brolgar, heavily inspired feasts package tidyverts. also like thank Tania Prvan valuable early contributions project, well Stuart Lee helpful discussions. Thanks also Ursula Laa feedback package structure documentation.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":null,"dir":"Reference","previous_headings":"","what":"Add the number of observations for each key in a tsibble — add_n_obs","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":", counting number rows dataset, rather counting number observations keys data.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"","code":"add_n_obs(.data, ...)"},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":".data tsibble ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"tsibble n_obs, number observations per key added.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"","code":"library(dplyr) #>  #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #>  #>     filter, lag #> The following objects are masked from ‘package:base’: #>  #>     intersect, setdiff, setequal, union # you can explore the data to see those cases that have exactly two   # observations: heights %>%    add_n_obs() %>%    filter(n_obs == 2) #> # A tsibble: 16 x 5 [!] #> # Key:       country [8] #>    country              year n_obs continent height_cm #>    <chr>               <dbl> <int> <chr>         <dbl> #>  1 Botswana             1910     2 Africa         165. #>  2 Botswana             1980     2 Africa         167. #>  3 Burundi              1920     2 Africa         166. #>  4 Burundi              1930     2 Africa         169. #>  5 Costa Rica           1940     2 Americas       166. #>  6 Costa Rica           1980     2 Americas       174. #>  7 El Salvador          1990     2 Americas       169. #>  8 El Salvador          2000     2 Americas       171. #>  9 Libya                1890     2 Africa         166. #> 10 Libya                1920     2 Africa         165. #> 11 Mongolia             1910     2 Asia           163. #> 12 Mongolia             1930     2 Asia           165. #> 13 Singapore            1970     2 Asia           172. #> 14 Singapore            2000     2 Asia           175. #> 15 Trinidad and Tobago  1980     2 Americas       174. #> 16 Trinidad and Tobago  2000     2 Americas       174."},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":null,"dir":"Reference","previous_headings":"","what":"Brolgar summaries (b_summaries) — b_min","title":"Brolgar summaries (b_summaries) — b_min","text":"Customised summaries vectors appropriate defaults longitudinal data. functions prefixed b_ assist autocomplete. uses na.rm = TRUE , calculations involving quantiles, type = 8 names = FALSE. Summaries include: * b_min: minimum * b_max: maximum * b_median: median * b_mean: mean * b_q25: 25th quantile * b_q75: 75th quantile * b_range: range * b_range_diff: difference range (max - min) * b_sd: standard deviation * b_var: variance * b_mad: mean absolute deviation * b_iqr: Inter-quartile range * b_diff_var: variance diff() * b_diff_sd: standard deviation diff() * b_diff_mean: mean diff() * b_diff_median: median diff() * b_diff_q25: q25 diff() * b_diff_q75: q75 diff()","code":""},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Brolgar summaries (b_summaries) — b_min","text":"","code":"b_min(x, ...)  b_max(x, ...)  b_median(x, ...)  b_mean(x, ...)  b_q25(x, ...)  b_q75(x, ...)  b_range(x, ...)  b_range_diff(x, ...)  b_sd(x, ...)  b_var(x, ...)  b_mad(x, ...)  b_iqr(x, ...)  b_diff_var(x, ...)  b_diff_sd(x, ...)  b_diff_mean(x, ...)  b_diff_median(x, ...)  b_diff_q25(x, ...)  b_diff_q75(x, ...)  b_diff_max(x, ...)  b_diff_min(x, ...)  b_diff_iqr(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Brolgar summaries (b_summaries) — b_min","text":"x vector ... arguments pass","code":""},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Brolgar summaries (b_summaries) — b_min","text":"","code":"x <- c(1:5, NA, 5:1) min(x) #> [1] NA b_min(x) #> [1] 1 max(x) #> [1] NA b_max(x) #> [1] 5 median(x) #> [1] NA b_median(x) #> [1] 3 mean(x) #> [1] NA b_mean(x) #> [1] 3 range(x) #> [1] NA NA b_range(x) #> [1] 1 5 var(x) #> [1] NA b_var(x) #> [1] 2.222222 sd(x) #> [1] NA b_sd(x) #> [1] 1.490712"},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"can calculate series summary statistics (features) given variable dataset. example, three number summary, minimum, median, maximum, can calculated given variable. designed work features() function shown examples. available features brolgar include: feat_three_num() - minimum, median, maximum feat_five_num() - minimum, q25, median, q75, maximum. feat_ranges() - min, max, range difference, interquartile range. feat_spread()  - variance, standard deviation, median absolute distance, interquartile range feat_monotonic() - always increasing, decreasing, unvarying? feat_diff_summary() - summary statistics differences amongst value, including five number summary, well standard deviation variance. Returns NA one observation, take difference one observation, difference 0 cases misleading. feat_brolgar()  features brolgar.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"","code":"feat_three_num(x, ...)  feat_five_num(x, ...)  feat_ranges(x, ...)  feat_spread(x, ...)  feat_monotonic(x, ...)  feat_brolgar(x, ...)  feat_diff_summary(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"x vector extract features . ... arguments passed functions.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"","code":"# You can use any of the features `feat_*` in conjunction with `features`  # like so: heights %>%   features(height_cm, # variable you want to explore            feat_three_num) # the feature summarisation you want to perform #> # A tibble: 144 × 4 #>    country       min   med   max #>    <chr>       <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  167.  168. #>  2 Albania      168.  170.  170. #>  3 Algeria      166.  169   171. #>  4 Angola       159.  167.  169. #>  5 Argentina    167.  168.  174. #>  6 Armenia      164.  169.  172. #>  7 Australia    170   172.  178. #>  8 Austria      162.  167.  179. #>  9 Azerbaijan   170.  172.  172. #> 10 Bahrain      161.  164.  164  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":null,"dir":"Reference","previous_headings":"","what":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"Provides framework tools summarise, visualise, explore longitudinal data. builds upon tidy time series data frames used 'tsibble' package, designed integrate within 'tidyverse', 'tidyverts' (time series) ecosystems. methods implemented include calculating features understanding longitudinal data, including calculating summary statistics quantiles, medians, numeric ranges, sampling individual series, identifying individual series representative group, extending facet system 'ggplot2' facilitate exploration samples data. methods fully described paper \"brolgar: R package Browse Longitudinal Data Graphically Analytically R\", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) arXiv:2012.01619.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"brolgar stands : BRowse Longitudinal data Graphically Analytically R.","code":""},{"path":[]},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"Maintainer: Nicholas Tierney nicholas.tierney@gmail.com (ORCID) Authors: Di Cook dicook@monash.edu (ORCID) Tania Prvan tania.prvan@mq.edu.au contributors: Stuart Lee [contributor] Earo Wang [contributor]","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":null,"dir":"Reference","previous_headings":"","what":"Facet data into groups to facilitate exploration — facet_sample","title":"Facet data into groups to facilitate exploration — facet_sample","text":"function requires tbl_ts object, can created tsibble::as_tsibble(). hood, facet_strata powered stratify_keys() sample_n_keys().","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facet data into groups to facilitate exploration — facet_sample","text":"","code":"facet_sample(   n_per_facet = 3,   n_facets = 12,   nrow = NULL,   ncol = NULL,   scales = \"fixed\",   shrink = TRUE,   strip.position = \"top\" )"},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facet data into groups to facilitate exploration — facet_sample","text":"n_per_facet Number keys per facet want plot. Default 3. n_facets Number facets create. Default 12 nrow, ncol Number rows columns. scales scales fixed (\"fixed\", default), free (\"free\"), free one dimension (\"free_x\", \"free_y\")? shrink TRUE, shrink scales fit output statistics, raw data. FALSE, range raw data statistical summary. strip.position default, labels displayed top plot. Using strip.position possible place labels either four sides setting strip.position = c(\"top\",   \"bottom\", \"left\", \"right\")","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facet data into groups to facilitate exploration — facet_sample","text":"ggplot object","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facet data into groups to facilitate exploration — facet_sample","text":"","code":"library(ggplot2) ggplot(heights, aes(x = year,     y = height_cm,     group = country)) +   geom_line() +   facet_sample()   ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_sample(n_per_facet = 1,                n_facets = 12)"},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":null,"dir":"Reference","previous_headings":"","what":"Facet data into groups to facilitate exploration — facet_strata","title":"Facet data into groups to facilitate exploration — facet_strata","text":"function requires tbl_ts object, can created tsibble::as_tsibble(). hood, facet_strata powered stratify_keys().","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facet data into groups to facilitate exploration — facet_strata","text":"","code":"facet_strata(   n_strata = 12,   along = NULL,   fun = mean,   nrow = NULL,   ncol = NULL,   scales = \"fixed\",   shrink = TRUE,   strip.position = \"top\" )"},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facet data into groups to facilitate exploration — facet_strata","text":"n_strata number groups create along variable stratify along. groups key takes summary statistic (default, mean). arranges mean value key assigns n_strata groups. fun summary function. Default mean. nrow, ncol Number rows columns. scales scales fixed (\"fixed\", default), free (\"free\"), free one dimension (\"free_x\", \"free_y\")? shrink TRUE, shrink scales fit output statistics, raw data. FALSE, range raw data statistical summary. strip.position default, labels displayed top plot. Using strip.position possible place labels either four sides setting strip.position = c(\"top\",   \"bottom\", \"left\", \"right\")","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facet data into groups to facilitate exploration — facet_strata","text":"ggplot object","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facet data into groups to facilitate exploration — facet_strata","text":"","code":"library(ggplot2) ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata()      ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_wrap(~continent)   ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata(along = year)   # \\donttest{ library(dplyr) heights %>%   key_slope(height_cm ~ year) %>%   right_join(heights, ., by = \"country\") %>%   ggplot(aes(x = year,              y = height_cm)) +   geom_line(aes(group = country)) +   geom_smooth(method = \"lm\") +    facet_strata(along = .slope_year) #> `geom_smooth()` using formula = 'y ~ x'  # }"},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":null,"dir":"Reference","previous_headings":"","what":"World Height Data — heights","title":"World Height Data — heights","text":"Average male heights 144 countries 1810-1989, smaller number countries 1500-1800. Data filtered include countries one observation.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"World Height Data — heights","text":"","code":"heights"},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"World Height Data — heights","text":"object class tbl_ts (inherits tbl_df, tbl, data.frame) 1490 rows 4 columns.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"World Height Data — heights","text":"heights stored time series tsibble object. contains variables: country: Country. forms identifying key. year: Year. forms time index. height_cm: Average male height centimeters. continent: continent extracted country name using countrycode package (https://joss.theoj.org/papers/10.21105/joss.00848). information, see article: \"tall others short? Agricultural production proximate determinants global heights\",  Joerg Baten Matthias Blum, European Review Economic History 18 (2014), 144–165. Data available https://datasets.iisg.amsterdam/dataset.xhtml?persistentId=hdl:10622/IAEKLA, accessed via Clio Infra website.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"World Height Data — heights","text":"","code":"# show the data heights #> # A tsibble: 1,490 x 4 [!] #> # Key:       country [144] #>    country     continent  year height_cm #>    <chr>       <chr>     <dbl>     <dbl> #>  1 Afghanistan Asia       1870      168. #>  2 Afghanistan Asia       1880      166. #>  3 Afghanistan Asia       1930      167. #>  4 Afghanistan Asia       1990      167. #>  5 Afghanistan Asia       2000      161. #>  6 Albania     Europe     1880      170. #>  7 Albania     Europe     1890      170. #>  8 Albania     Europe     1900      169. #>  9 Albania     Europe     2000      168. #> 10 Algeria     Africa     1910      169. #> # ℹ 1,480 more rows  # show the spaghetti plot (ugh!) library(ggplot2) ggplot(heights,         aes(x = year,             y = height_cm,             group = country)) +      geom_line()       # Explore all samples with `facet_strata()` ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata()   # Explore the heights over each continent ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_wrap(~continent)     # explore the five number summary of height_cm with `features` heights %>%    features(height_cm, feat_five_num) #> # A tibble: 144 × 6 #>    country       min   q25   med   q75   max #>    <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  164.  167.  168.  168. #>  2 Albania      168.  168.  170.  170.  170. #>  3 Algeria      166.  168.  169   170.  171. #>  4 Angola       159.  160.  167.  168.  169. #>  5 Argentina    167.  168.  168.  170.  174. #>  6 Armenia      164.  166.  169.  172.  172. #>  7 Australia    170   171.  172.  173.  178. #>  8 Austria      162.  164.  167.  169.  179. #>  9 Azerbaijan   170.  171.  172.  172.  172. #> 10 Bahrain      161.  161.  164.  164.  164  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":null,"dir":"Reference","previous_headings":"","what":"Index summaries — index_summary","title":"Index summaries — index_summary","text":"functions check index regular (index_regular()), summarise index variable (index_summary()). can useful check index variables.","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Index summaries — index_summary","text":"","code":"index_regular(.data, ...)  # S3 method for tbl_ts index_regular(.data, ...)  # S3 method for data.frame index_regular(.data, index, ...)  index_summary(.data, ...)  # S3 method for tbl_ts index_summary(.data, ...)  # S3 method for data.frame index_summary(.data, index, ...)"},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Index summaries — index_summary","text":".data data.frame tsibble ... extra arguments index proposed index variable","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Index summaries — index_summary","text":"logical  TRUE means regular, FALSE means ","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Index summaries — index_summary","text":"","code":"# a tsibble index_regular(heights) #> [1] FALSE  # some data frames index_regular(pisa, year) #> [1] TRUE index_regular(airquality, Month) #> [1] TRUE  # a tsibble index_summary(heights) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    1550    1730    1820    1818    1910    2000  # some data frames index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018  index_summary(airquality, Month) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>       5       6       7       7       8       9  index_summary(airquality, Day) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     1.0     8.5    16.0    16.0    23.5    31.0"},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit linear model for each key — key_slope","title":"Fit linear model for each key — key_slope","text":"Using key_slope can fit linear model key tsibble. add_key_slope adds slope information back data, returns full dimension tsibble.","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit linear model for each key — key_slope","text":"","code":"key_slope(.data, formula, ...)  add_key_slope(.data, formula)  add_key_slope.default(.data, formula)"},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit linear model for each key — key_slope","text":".data tsibble formula formula ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit linear model for each key — key_slope","text":"tibble coefficient information","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit linear model for each key — key_slope","text":"","code":"key_slope(heights, height_cm ~ year) #> # A tibble: 144 × 3 #>    country     .intercept .slope_year #>    <chr>            <dbl>       <dbl> #>  1 Afghanistan      217.      -0.0263 #>  2 Albania          202.      -0.0170 #>  3 Algeria          111.       0.0297 #>  4 Angola            43.9      0.0648 #>  5 Argentina        147.       0.0117 #>  6 Armenia           87.9      0.0419 #>  7 Australia         46.1      0.0665 #>  8 Austria           38.2      0.0695 #>  9 Azerbaijan       150.       0.0111 #> 10 Bahrain         -157.       0.165  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"","code":"# S3 method for data.frame keys_near(.data, key, var, top_n = 1, funs = l_five_num, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":".data data.frame key key, identifies unique observations. var variable summarise top_n top number closest observations return - default 1, also return ties. funs named list functions summarise . Default given list five number summary, l_five_num. ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"","code":"heights %>%   key_slope(height_cm ~ year) %>%   keys_near(key = country,             var = .slope_year) #> # A tibble: 6 × 5 #>   country    .slope_year stat  stat_value stat_diff #>   <chr>            <dbl> <fct>      <dbl>     <dbl> #> 1 Austria         0.0695 q_75      0.0690  0.000515 #> 2 Burundi         0.321  max       0.321   0        #> 3 Eritrea        -0.102  min      -0.102   0        #> 4 Mali            0.0401 med       0.0403  0.000120 #> 5 Spain           0.0404 med       0.0403  0.000120 #> 6 Tajikistan      0.0199 q_25      0.0205  0.000632 # Specify your own list of summaries l_ranges <- list(min = b_min,                  range_diff = b_range_diff,                  max = b_max,                  iqr = b_iqr)  heights %>%   key_slope(formula = height_cm ~ year) %>%   keys_near(key = country,               var = .slope_year,               funs = l_ranges) #> # A tibble: 4 × 5 #>   country     .slope_year stat       stat_value stat_diff #>   <chr>             <dbl> <fct>           <dbl>     <dbl> #> 1 Burundi          0.321  range_diff     0.424    0.102   #> 2 Burundi          0.321  max            0.321    0       #> 3 Eritrea         -0.102  min           -0.102    0       #> 4 Switzerland      0.0496 iqr            0.0485   0.00116"},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"","code":"keys_near(.data, ...)  # S3 method for default keys_near(.data, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near","text":".data tsibble ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"data.frame containing keys closest given statistic.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"","code":"keys_near(heights, height_cm) #> # A tibble: 18 × 5 #>    country          height_cm stat  stat_value stat_diff #>    <chr>                <dbl> <fct>      <dbl>     <dbl> #>  1 Denmark               183. max         183.   0       #>  2 Ethiopia              167. med         167.   0.00900 #>  3 Ghana                 164. q_25        164.   0       #>  4 Hungary               164. q_25        164.   0       #>  5 Italy                 164. q_25        164.   0       #>  6 Italy                 164. q_25        164.   0       #>  7 Liberia               167. med         167.   0.00900 #>  8 Morocco               170. q_75        170.   0.00392 #>  9 Mozambique            164. q_25        164.   0       #> 10 Mozambique            164. q_25        164.   0       #> 11 Pakistan              164. q_25        164.   0       #> 12 Papua New Guinea      152. min         152.   0       #> 13 Romania               164. q_25        164.   0       #> 14 Romania               164. q_25        164.   0       #> 15 Sierra Leone          164. q_25        164.   0       #> 16 Slovakia              164. q_25        164.   0       #> 17 Ukraine               164. q_25        164.   0       #> 18 Ukraine               164. q_25        164.   0"},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"","code":"# S3 method for tbl_ts keys_near(.data, var, top_n = 1, funs = l_five_num, stat_as_factor = TRUE, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":".data tsibble var variable summarise top_n top number closest observations return - default 1, also return ties. funs named list functions summarise . Default given list five number summary, l_five_num. stat_as_factor coerce stat variable factor? Default TRUE. ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"","code":"# Return observations closest to the five number summary of height_cm heights %>%   keys_near(var = height_cm) #> # A tibble: 18 × 5 #>    country          height_cm stat  stat_value stat_diff #>    <chr>                <dbl> <fct>      <dbl>     <dbl> #>  1 Denmark               183. max         183.   0       #>  2 Ethiopia              167. med         167.   0.00900 #>  3 Ghana                 164. q_25        164.   0       #>  4 Hungary               164. q_25        164.   0       #>  5 Italy                 164. q_25        164.   0       #>  6 Italy                 164. q_25        164.   0       #>  7 Liberia               167. med         167.   0.00900 #>  8 Morocco               170. q_75        170.   0.00392 #>  9 Mozambique            164. q_25        164.   0       #> 10 Mozambique            164. q_25        164.   0       #> 11 Pakistan              164. q_25        164.   0       #> 12 Papua New Guinea      152. min         152.   0       #> 13 Romania               164. q_25        164.   0       #> 14 Romania               164. q_25        164.   0       #> 15 Sierra Leone          164. q_25        164.   0       #> 16 Slovakia              164. q_25        164.   0       #> 17 Ukraine               164. q_25        164.   0       #> 18 Ukraine               164. q_25        164.   0"},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":null,"dir":"Reference","previous_headings":"","what":"A named list of the five number summary — l_funs","title":"A named list of the five number summary — l_funs","text":"Designed use keys_near() function.","code":""},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A named list of the five number summary — l_funs","text":"","code":"l_five_num  l_three_num"},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A named list of the five number summary — l_funs","text":"object class list length 5. object class list length 3.","code":""},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A named list of the five number summary — l_funs","text":"","code":"# Specify your own list of summaries l_ranges <- list(min = b_min,                  range_diff = b_range_diff,                  max = b_max,                  iqr = b_iqr)  heights %>%   key_slope(formula = height_cm ~ year) %>%   keys_near(key = country,               var = .slope_year,               funs = l_ranges) #> # A tibble: 4 × 5 #>   country     .slope_year stat       stat_value stat_diff #>   <chr>             <dbl> <fct>           <dbl>     <dbl> #> 1 Burundi          0.321  range_diff     0.424    0.102   #> 2 Burundi          0.321  max            0.321    0       #> 3 Eritrea         -0.102  min           -0.102    0       #> 4 Switzerland      0.0496 iqr            0.0485   0.00116"},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":null,"dir":"Reference","previous_headings":"","what":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"provides three families functions tell values always increasing, decreasing, unvarying, functions, increasing(), decreasing(), unvarying(). hood uses diff find differences, like can pass extra arguments diff.","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"","code":"increasing(x, ...)  decreasing(x, ...)  unvarying(x, ...)  monotonic(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"x numeric integer ... extra arguments pass diff","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"logical TRUE FALSE","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"","code":"vec_inc <- c(1:10) vec_dec<- c(10:1) vec_ran <- c(sample(1:10)) vec_flat <- rep.int(1,10)  increasing(vec_inc) #> [1] TRUE increasing(vec_dec) #> [1] FALSE increasing(vec_ran) #> [1] FALSE increasing(vec_flat) #> [1] FALSE  decreasing(vec_inc) #> [1] FALSE decreasing(vec_dec) #> [1] TRUE decreasing(vec_ran) #> [1] FALSE decreasing(vec_flat) #> [1] FALSE  unvarying(vec_inc) #> [1] FALSE unvarying(vec_dec) #> [1] FALSE unvarying(vec_ran) #> [1] FALSE unvarying(vec_flat) #> [1] TRUE  library(ggplot2) library(gghighlight) library(dplyr)  heights_mono <- heights %>%   features(height_cm, feat_monotonic) %>%   left_join(heights, by = \"country\")      ggplot(heights_mono,          aes(x = year,              y = height_cm,              group = country)) +   geom_line() +    gghighlight(increase) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: country #> Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps    ggplot(heights_mono,         aes(x = year,             y = height_cm,              group = country)) +   geom_line() +    gghighlight(decrease) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: country   heights_mono %>% filter(monotonic) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()     heights_mono %>%   filter(increase) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":null,"dir":"Reference","previous_headings":"","what":"Return the number of observations — n_obs","title":"Return the number of observations — n_obs","text":"Returns number observations vector data.frame. uses vctrs::vec_size() hood.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return the number of observations — n_obs","text":"","code":"n_obs(x, names = TRUE)"},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return the number of observations — n_obs","text":"x vector data.frame names logical; TRUE result named vector named \"n_obs\", else just number observations.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return the number of observations — n_obs","text":"number observations","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"note","dir":"Reference","previous_headings":"","what":"Note","title":"Return the number of observations — n_obs","text":"use n_obs features counting key variable like - features(heights, country, n_obs). Instead, use variable.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return the number of observations — n_obs","text":"","code":"n_obs(iris) #> n_obs  #>   150  n_obs(1:10) #> n_obs  #>    10  add_n_obs(heights) #> # A tsibble: 1,490 x 5 [!] #> # Key:       country [144] #>    country      year n_obs continent height_cm #>    <chr>       <dbl> <int> <chr>         <dbl> #>  1 Afghanistan  1870     5 Asia           168. #>  2 Afghanistan  1880     5 Asia           166. #>  3 Afghanistan  1930     5 Asia           167. #>  4 Afghanistan  1990     5 Asia           167. #>  5 Afghanistan  2000     5 Asia           161. #>  6 Albania      1880     4 Europe         170. #>  7 Albania      1890     4 Europe         170. #>  8 Albania      1900     4 Europe         169. #>  9 Albania      2000     4 Europe         168. #> 10 Algeria      1910     5 Africa         169. #> # ℹ 1,480 more rows heights %>%   features(height_cm, n_obs) # can be any variable except id, the key. #> # A tibble: 144 × 2 #>    country     n_obs #>    <chr>       <int> #>  1 Afghanistan     5 #>  2 Albania         4 #>  3 Algeria         5 #>  4 Angola          9 #>  5 Argentina      20 #>  6 Armenia        11 #>  7 Australia      10 #>  8 Austria        18 #>  9 Azerbaijan      7 #> 10 Bahrain         3 #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":null,"dir":"Reference","previous_headings":"","what":"Return x percent to y percent of values — near_between","title":"Return x percent to y percent of values — near_between","text":"Return x percent y percent values","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return x percent to y percent of values — near_between","text":"","code":"near_between(x, from, to)"},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return x percent to y percent of values — near_between","text":"x numeric vector lower bound percentage upper bound percentage","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return x percent to y percent of values — near_between","text":"logical vector","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return x percent to y percent of values — near_between","text":"","code":"x <- runif(20)  near_middle(x = x,             middle = 0.5,             within = 0.2) #>  [1]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  library(dplyr) heights %>% features(height_cm, list(min = min)) %>%   filter(near_between(min, 0.1, 0.9)) #> # A tibble: 114 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Afghanistan  161. #>  2 Albania      168. #>  3 Algeria      166. #>  4 Argentina    167. #>  5 Armenia      164. #>  6 Austria      162. #>  7 Bahrain      161. #>  8 Bangladesh   160. #>  9 Belarus      164. #> 10 Belgium      163. #> # ℹ 104 more rows  near_quantile(x = x,               probs = 0.5,                tol = 0.01) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  near_quantile(x, c(0.25, 0.5, 0.75), 0.05) #>  [1]  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  heights %>%   features(height_cm, l_five_num) %>%   mutate_at(vars(min:max),             .funs = near_quantile,             0.5,              0.01) %>%   filter(min) #> # A tibble: 0 × 6 #> # ℹ 6 variables: country <chr>, min <lgl>, q_25 <lgl>, med <lgl>, q_75 <lgl>, #> #   max <lgl>  heights %>%   features(height_cm, list(min = min)) %>%   mutate(min_near_q3 = near_quantile(min, c(0.25, 0.5, 0.75), 0.01)) %>%   filter(min_near_q3) #> # A tibble: 2 × 3 #>   country      min min_near_q3 #>   <chr>      <dbl> <lgl>       #> 1 Ethiopia    161. TRUE        #> 2 Madagascar  161. TRUE         heights %>%   features(height_cm, list(min = min)) %>%   filter(near_between(min, 0.1, 0.9)) #> # A tibble: 114 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Afghanistan  161. #>  2 Albania      168. #>  3 Algeria      166. #>  4 Argentina    167. #>  5 Armenia      164. #>  6 Austria      162. #>  7 Bahrain      161. #>  8 Bangladesh   160. #>  9 Belarus      164. #> 10 Belgium      163. #> # ℹ 104 more rows  heights %>%   features(height_cm, list(min = min)) %>%   filter(near_middle(min, 0.5, 0.1)) #> # A tibble: 14 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Brazil       164. #>  2 Cameroon     164. #>  3 Estonia      165. #>  4 Gabon        164. #>  5 Ghana        164. #>  6 Guinea       164. #>  7 Kenya        165. #>  8 Kyrgyzstan   164. #>  9 Latvia       165. #> 10 Lithuania    165. #> 11 Netherlands  164. #> 12 Switzerland  165. #> 13 Tajikistan   165. #> 14 Uganda       165."},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":null,"dir":"Reference","previous_headings":"","what":"Return the middle x percent of values — near_middle","title":"Return the middle x percent of values — near_middle","text":"Return middle x percent values","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return the middle x percent of values — near_middle","text":"","code":"near_middle(x, middle, within)"},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return the middle x percent of values — near_middle","text":"x numeric vector middle percentage want center around within percentage around center","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return the middle x percent of values — near_middle","text":"logical vector","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return the middle x percent of values — near_middle","text":"","code":"x <- runif(20) near_middle(x = x,             middle = 0.5,             within = 0.2) #>  [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE #> [13] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE              library(dplyr) heights %>% features(height_cm, list(min = min)) %>%   filter(near_middle(min, 0.5, 0.1)) #> # A tibble: 14 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Brazil       164. #>  2 Cameroon     164. #>  3 Estonia      165. #>  4 Gabon        164. #>  5 Ghana        164. #>  6 Guinea       164. #>  7 Kenya        165. #>  8 Kyrgyzstan   164. #>  9 Latvia       165. #> 10 Lithuania    165. #> 11 Netherlands  164. #> 12 Switzerland  165. #> 13 Tajikistan   165. #> 14 Uganda       165."},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":null,"dir":"Reference","previous_headings":"","what":"Which values are nearest to any given quantiles — near_quantile","title":"Which values are nearest to any given quantiles — near_quantile","text":"values nearest given quantiles","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Which values are nearest to any given quantiles — near_quantile","text":"","code":"near_quantile(x, probs, tol = 0.01)"},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Which values are nearest to any given quantiles — near_quantile","text":"x vector probs quantiles calculate tol tolerance terms x accept near quantile. Default 0.01.","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Which values are nearest to any given quantiles — near_quantile","text":"logical vector TRUE/FALSE number close quantile","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Which values are nearest to any given quantiles — near_quantile","text":"","code":"x <- runif(20) near_quantile(x, 0.5, 0.05) #>  [1] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE near_quantile(x, c(0.25, 0.5, 0.75), 0.05) #>  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE #> [13] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  library(dplyr) heights %>%    features(height_cm, list(min = min)) %>%    mutate(min_near_median = near_quantile(min, 0.5, 0.01)) %>%   filter(min_near_median) #> # A tibble: 0 × 3 #> # ℹ 3 variables: country <chr>, min <dbl>, min_near_median <lgl> heights %>%    features(height_cm, list(min = min)) %>%    mutate(min_near_q3 = near_quantile(min, c(0.25, 0.5, 0.75), 0.01)) %>%   filter(min_near_q3) #> # A tibble: 2 × 3 #>   country      min min_near_q3 #>   <chr>      <dbl> <lgl>       #> 1 Ethiopia    161. TRUE        #> 2 Madagascar  161. TRUE"},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x nearest to y? — nearests","title":"Is x nearest to y? — nearests","text":"Returns TRUE x nearest y. two implementations. nearest_lgl() returns logical vector element first argument nearest element second argument. nearest_qt_lgl() similar nearest_lgl(), instead determines element first argument nearest value given quantile probabilities. See example detail.","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x nearest to y? — nearests","text":"","code":"nearest_lgl(x, y)  nearest_qt_lgl(y, ...)"},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x nearest to y? — nearests","text":"x numeric vector y numeric vector ... (used) arguments pass quantile().","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x nearest to y? — nearests","text":"logical vector length(y)","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x nearest to y? — nearests","text":"","code":"x <- 1:10 y <- 5:14 z <- 16:25 a <- -1:-5 b <- -1  nearest_lgl(x, y) #>  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE nearest_lgl(y, x) #>  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  nearest_lgl(x, z) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE nearest_lgl(z, x) #>  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  nearest_lgl(x, a) #> [1]  TRUE FALSE FALSE FALSE FALSE nearest_lgl(a, x) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  nearest_lgl(x, b) #> [1] TRUE nearest_lgl(b, x) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  library(dplyr) heights_near_min <- heights %>%   filter(nearest_lgl(min(height_cm), height_cm))    heights_near_fivenum <- heights %>%   filter(nearest_lgl(fivenum(height_cm), height_cm))    heights_near_qt_1 <- heights %>%   filter(nearest_qt_lgl(height_cm, c(0.5)))    heights_near_qt_3 <- heights %>%   filter(nearest_qt_lgl(height_cm, c(0.1, 0.5, 0.9)))"},{"path":"https://brolgar.njtierney.com/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://brolgar.njtierney.com/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":null,"dir":"Reference","previous_headings":"","what":"Student data from 2000-2018 PISA OECD data — pisa","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"subset PISA data, containing scores information triennial testing 15 year olds around globe. Original data available https://www.oecd.org/pisa/data/. Data derived https://github.com/kevinwang09/learningtower.","code":""},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"","code":"pisa"},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"tibble following variables year year measurement country three letter country code. data contains Australia, New Zealand, Indonesia. full data learningtower contains 99 countries. school_id unique school identification number student_id student identification number gender recorded gender - 1 female 2 male missing math Simulated score mathematics read Simulated score reading science Simulated score science stu_wgt final survey weight score student score Understanding bit PISA data, school_id student_id unique across time. means longitudinal element country within given year. can cast pisa tsibble, need aggregate data year country. , important provide summary statistics scores - want include mean, minimum maximum math, reading, science scores, lose information individuals. example code , first grouping year country, calculating weighted mean math, reading, science. can done using student weight variable stu_wgt, get survey weighted mean. minimum maximum calculated.","code":""},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"","code":"pisa #> # A tibble: 433 × 11 #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>  library(dplyr) # Let's identify  #1.  The **key**, the individual, who would have repeated measurements.  #2.  The **index**, the time component. #3.  The **regularity** of the time interval (index).   # Here it looks like the key is the student_id, which is nested within # school_id #' and country,  # And the index is year, so we would write the following  as_tsibble(pisa,             key = country,            index = year) #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>  # We can assess the regularity of the year like so:  index_regular(pisa, year) #> [1] TRUE index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018   # We can now convert this into a `tsibble`:  pisa_ts <- as_tsibble(pisa,            key = country,            index = year,            regular = TRUE)  pisa_ts #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl> pisa_ts_au_nz <- pisa_ts %>% filter(country %in% c(\"AUS\", \"NZL\", \"QAT\"))  library(ggplot2) ggplot(pisa_ts_au_nz,         aes(x = year,             y = math_mean,            group = country,            colour = country)) +   geom_ribbon(aes(ymin = math_min,                    ymax = math_max),                fill = \"grey70\") +   geom_line(size = 1) +   lims(y = c(0, 1000)) +   labs(y = \"math\") + facet_wrap(~country) #> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. #> ℹ Please use `linewidth` instead."},{"path":"https://brolgar.njtierney.com/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. fabletools features, features_all, features_at, features_if tsibble as_tsibble, n_keys","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample a number or fraction of keys to explore — sample-n-frac-keys","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"Sample number fraction keys explore","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"","code":"sample_n_keys(.data, size)  sample_frac_keys(.data, size)"},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":".data tsibble object size number fraction observations, depending function used. sample_n_keys, number > 0, sample_frac_keys fraction, 0 1.","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"tsibble fewer observations key","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"","code":"library(ggplot2) sample_n_keys(heights,              size = 10) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()  library(ggplot2) sample_frac_keys(wages,                 0.1) %>%   ggplot(aes(x = xp,              y = unemploy_rate,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":null,"dir":"Reference","previous_headings":"","what":"Stratify the keys into groups to facilitate exploration — stratify_keys","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"look much raw data possible, can helpful stratify data groups plotting. can stratify keys using stratify_keys() function, adds column, .strata. allows user create facetted plots showing raw data.","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"","code":"stratify_keys(.data, n_strata, along = NULL, fun = mean, ...)"},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":".data data.frame explore n_strata number groups create along variable stratify along. groups key takes summary statistic (default, mean). arranges mean value key assigns n_strata groups. fun summary function. Default mean. ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"data.frame column, .strata containing n_strata groups","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"","code":"library(ggplot2) library(brolgar)  heights %>%   sample_frac_keys(size = 0.1) %>%   stratify_keys(10) %>%  ggplot(aes(x = height_cm,             y = year,             group = country)) +   geom_line() +   facet_wrap(~.strata)     # now facet along some feature library(dplyr)  heights %>% key_slope(height_cm ~ year) %>%   right_join(heights, ., by = \"country\") %>%   stratify_keys(n_strata = 12,                 along = .slope_year,                 fun = median) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line() +    facet_wrap(~.strata)    heights %>%   stratify_keys(n_strata = 12,                 along = height_cm) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line() +    facet_wrap(~.strata)"},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":null,"dir":"Reference","previous_headings":"","what":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"data contains measurements hourly wages years workforce, education race covariates. population measured male high-school dropouts, aged 14 17 years first measured. wages time series tsibble. comes J. D. Singer J. B. Willett. Applied Longitudinal Data Analysis. Oxford University Press, Oxford, UK, 2003. https://stats.idre.ucla.edu/stat/r/examples/alda/data/wages_pp.txt","code":""},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"","code":"wages"},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"tsibble data frame 6402 rows 8 variables: id 1–888, subject. forms key data ln_wages natural log wages, adjusted inflation, 1990 dollars. xp Experience - length time workforce (years). treated time variable, t0 subject starting first day work. number time points values time points subject can differ. forms index data ged /graduate equivalency diploma obtained. xp_since_ged change experience since getting ged (get one) black categorical indicator race = black. hispanic categorical indicator race = hispanic. high_grade highest grade completed unemploy_rate unemployment rates local geographic region measurement time","code":""},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"","code":"# show the data wages #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(ggplot2) # set seed so that the plots stay the same set.seed(2019-7-15-1300) # explore a sample of five individuals wages %>%   sample_n_keys(size = 5) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()   # Explore many samples with `facet_sample()`   ggplot(wages,           aes(x = xp,              y = ln_wages,              group = id)) +    geom_line() +    facet_sample()   # explore the five number summary of ln_wages with `features` wages %>%    features(ln_wages, feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows"}]
+[{"path":"https://brolgar.njtierney.com/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (https://www.contributor-covenant.org), version 1.0.0, available https://contributor-covenant.org/version/1/0/0/.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to brolgar","title":"Contributing to brolgar","text":"outlines propose change brolgar. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to brolgar","text":"Small typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES: edit roxygen comment .R file R/. : edit .Rd file man/.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"prerequisites","dir":"","previous_headings":"","what":"Prerequisites","title":"Contributing to brolgar","text":"make substantial pull request, always file issue make sure someone team agrees ’s problem. ’ve found bug, create associated issue illustrate bug minimal reprex.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"","what":"Pull request process","title":"Contributing to brolgar","text":"recommend create Git branch pull request (PR). Look Travis AppVeyor build status making changes. README contain badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat. Contributions test cases included easier accept. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s).","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to brolgar","text":"Please note brolgar project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://brolgar.njtierney.com/CONTRIBUTING.html","id":"see-tidyverse-development-contributing-guide","dir":"","previous_headings":"","what":"See tidyverse development contributing guide","title":"Contributing to brolgar","text":"details.","code":""},{"path":"https://brolgar.njtierney.com/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2019 Nicholas Tierney, Di Cook, Tania Prvan Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://brolgar.njtierney.com/articles/exploratory-modelling.html","id":"find-keys-near-other-summaries-with-keys_near","dir":"Articles","previous_headings":"","what":"Find keys near other summaries with keys_near()","title":"Exploratory Modelling","text":"might want summarise exploratory modelling finding slopes near five number summary values: Finding groups near values can surprisingly challenging! brolgar makes easier providing keys_near() function. tell key , variable want summarise , default returns keys near five number summary. Let’s return keys near .slope_xp: returns id, .slope_xp, statistic closest , difference slope_xp statistic. can visualise summary keys joining back data:  can read keys_near() Identifying interesting observations vignette.","code":"summary(wages_slope$.slope_xp) #>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's  #> -4.57692 -0.00189  0.04519  0.04490  0.08458 13.21569       38 wages_slope %>%   keys_near(key = id,             var = .slope_xp) #> # A tibble: 31 × 5 #>       id .slope_xp stat  stat_value stat_diff #>    <int>     <dbl> <fct>      <dbl>     <dbl> #>  1  2092  -0.00189 q_25    -0.00189         0 #>  2  2092  -0.00189 q_25    -0.00189         0 #>  3  2092  -0.00189 q_25    -0.00189         0 #>  4  2092  -0.00189 q_25    -0.00189         0 #>  5  2092  -0.00189 q_25    -0.00189         0 #>  6  2092  -0.00189 q_25    -0.00189         0 #>  7  6770   0.0846  q_75     0.0846          0 #>  8  6770   0.0846  q_75     0.0846          0 #>  9  6770   0.0846  q_75     0.0846          0 #> 10  6770   0.0846  q_75     0.0846          0 #> # ℹ 21 more rows wages_slope %>%   keys_near(key = id,             var = .slope_xp) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"calculating-features","dir":"Articles","previous_headings":"","what":"Calculating features","title":"Finding Features in Data","text":"can calculate features longitudinal data using features function (fabletools, made available brolgar). features works specifying data, variable summarise, feature calculate: pipe: example, can calculate five number summary (minimum, 25th quantile, median, mean, 75th quantile, maximum) data using feat_five_num, like : taking wages data, piping features, telling summarise ln_wages variable, using feat_five_num. several handy functions calculating features data brolgar provides. start feat_. can, example, find whose values increase decrease feat_monotonic: used identify individuals increase like : joined back data plotted:  get sense data came , create plot gghighlight highlight increase, using gghighlight(increase) - since increase logical, tells gghighlight highlight TRUE.  can explore available features, see function References","code":"features(<DATA>, <VARIABLE>, <FEATURE>) <DATA> %>% features(<VARIABLE>, <FEATURE>) wages_five <- wages %>%   features(ln_wages, feat_five_num)  wages_five #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages_mono <- wages %>%   features(ln_wages, feat_monotonic)  wages_mono #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows library(dplyr) wages_mono %>%   filter(increase) #> # A tibble: 50 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1   206 TRUE     FALSE    FALSE  TRUE      #>  2   295 TRUE     FALSE    FALSE  TRUE      #>  3   518 TRUE     FALSE    FALSE  TRUE      #>  4  1508 TRUE     FALSE    FALSE  TRUE      #>  5  2178 TRUE     FALSE    FALSE  TRUE      #>  6  2194 TRUE     FALSE    FALSE  TRUE      #>  7  2330 TRUE     FALSE    FALSE  TRUE      #>  8  2456 TRUE     FALSE    FALSE  TRUE      #>  9  2612 TRUE     FALSE    FALSE  TRUE      #> 10  2890 TRUE     FALSE    FALSE  TRUE      #> # ℹ 40 more rows wages_mono_join <- wages_mono %>%   filter(increase) %>%   left_join(wages, by = \"id\")  wages_mono_join #> # A tibble: 164 × 13 #>       id increase decrease unvary monotonic ln_wages    xp   ged xp_since_ged #>    <int> <lgl>    <lgl>    <lgl>  <lgl>        <dbl> <dbl> <int>        <dbl> #>  1   206 TRUE     FALSE    FALSE  TRUE          2.03 1.87      0        0     #>  2   206 TRUE     FALSE    FALSE  TRUE          2.30 2.81      0        0     #>  3   206 TRUE     FALSE    FALSE  TRUE          2.48 4.31      0        0     #>  4   295 TRUE     FALSE    FALSE  TRUE          1.79 2.03      0        0     #>  5   295 TRUE     FALSE    FALSE  TRUE          1.81 3.12      0        0     #>  6   295 TRUE     FALSE    FALSE  TRUE          2.11 4.16      0        0     #>  7   295 TRUE     FALSE    FALSE  TRUE          2.13 5.08      0        0     #>  8   295 TRUE     FALSE    FALSE  TRUE          2.31 6.58      0        0     #>  9   518 TRUE     FALSE    FALSE  TRUE          1.27 0.525     1        0.525 #> 10   518 TRUE     FALSE    FALSE  TRUE          1.61 1.93      1        1.93  #> # ℹ 154 more rows #> # ℹ 4 more variables: black <int>, hispanic <int>, high_grade <int>, #> #   unemploy_rate <dbl> ggplot(wages_mono_join,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line() library(gghighlight) wages_mono %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"creating-your-own-features","dir":"Articles","previous_headings":"","what":"Creating your own Features","title":"Finding Features in Data","text":"create features summaries pass features, provide named list functions. example: passed features like : Inside brolgar, features created following syntax: functions b_ functions default na.rm = TRUE, cases quantiles, use type = 8, names = FALSE.","code":"library(brolgar) feat_three <- list(min = min,                    med = median,                    max = max)  feat_three #> $min #> function (..., na.rm = FALSE)  .Primitive(\"min\") #>  #> $med #> function (x, na.rm = FALSE, ...)  #> UseMethod(\"median\") #> <bytecode: 0x558ebdb2bd90> #> <environment: namespace:stats> #>  #> $max #> function (..., na.rm = FALSE)  .Primitive(\"max\") wages %>%   features(ln_wages, feat_three) #> # A tibble: 888 × 4 #>       id   min   med   max #>    <int> <dbl> <dbl> <dbl> #>  1    31 1.43   1.73  2.13 #>  2    36 1.80   2.32  2.93 #>  3    53 1.54   1.71  3.24 #>  4   122 0.763  2.19  2.92 #>  5   134 2.00   2.36  2.93 #>  6   145 1.48   1.77  2.04 #>  7   155 1.54   2.22  2.64 #>  8   173 1.56   2.00  2.34 #>  9   206 2.03   2.30  2.48 #> 10   207 1.58   2.15  2.66 #> # ℹ 878 more rows  heights %>%   features(height_cm, feat_three) #> # A tibble: 144 × 4 #>    country       min   med   max #>    <chr>       <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  167.  168. #>  2 Albania      168.  170.  170. #>  3 Algeria      166.  169   171. #>  4 Angola       159.  167.  169. #>  5 Argentina    167.  168.  174. #>  6 Armenia      164.  169.  172. #>  7 Australia    170   172.  178. #>  8 Austria      162.  167.  179. #>  9 Azerbaijan   170.  172.  172. #> 10 Bahrain      161.  164.  164  #> # ℹ 134 more rows feat_five_num <- function(x, ...) {   list(     min = b_min(x, ...),     q25 = b_q25(x, ...),     med = b_median(x, ...),     q75 = b_q75(x, ...),     max = b_max(x, ...)   ) }"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"accessing-sets-of-features","dir":"Articles","previous_headings":"","what":"Accessing sets of features","title":"Finding Features in Data","text":"want run many features package data can collect feature_set. example: run like : information see ?fabletools::feature_set","code":"library(fabletools) feat_brolgar <- feature_set(pkgs = \"brolgar\") length(feat_brolgar) #> [1] 6 wages %>%   features(ln_wages, feat_brolgar) #> # A tibble: 888 × 46 #>       id min...1 med...2 max...3 min...4 q25...5 med...6 q75...7 max...8 min...9 #>    <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> #>  1    31   1.43     1.73    2.13   1.43     1.48    1.73    2.02    2.13   1.43  #>  2    36   1.80     2.32    2.93   1.80     1.97    2.32    2.59    2.93   1.80  #>  3    53   1.54     1.71    3.24   1.54     1.58    1.71    1.89    3.24   1.54  #>  4   122   0.763    2.19    2.92   0.763    2.10    2.19    2.46    2.92   0.763 #>  5   134   2.00     2.36    2.93   2.00     2.28    2.36    2.79    2.93   2.00  #>  6   145   1.48     1.77    2.04   1.48     1.58    1.77    1.89    2.04   1.48  #>  7   155   1.54     2.22    2.64   1.54     1.83    2.22    2.44    2.64   1.54  #>  8   173   1.56     2.00    2.34   1.56     1.68    2.00    2.05    2.34   1.56  #>  9   206   2.03     2.30    2.48   2.03     2.07    2.30    2.45    2.48   2.03  #> 10   207   1.58     2.15    2.66   1.58     1.87    2.15    2.26    2.66   1.58  #> # ℹ 878 more rows #> # ℹ 36 more variables: max...10 <dbl>, range_diff...11 <dbl>, iqr...12 <dbl>, #> #   var...13 <dbl>, sd...14 <dbl>, mad...15 <dbl>, iqr...16 <dbl>, #> #   min...17 <dbl>, max...18 <dbl>, median <dbl>, mean <dbl>, q25...21 <dbl>, #> #   q75...22 <dbl>, range1 <dbl>, range2 <dbl>, range_diff...25 <dbl>, #> #   sd...26 <dbl>, var...27 <dbl>, mad...28 <dbl>, iqr...29 <dbl>, #> #   increase...30 <dbl>, decrease...31 <dbl>, unvary...32 <dbl>, …"},{"path":"https://brolgar.njtierney.com/articles/finding-features.html","id":"registering-a-feature-in-a-package","dir":"Articles","previous_headings":"","what":"Registering a feature in a package","title":"Finding Features in Data","text":"create features package want make accessible feature_set, following. Functions can registered via fabletools::register_feature(). register features package, create file called zzz.R, use .onLoad(...) function set loading package:","code":".onLoad <- function(...) {   fabletools::register_feature(feat_three_num, c(\"summary\"))   # ... and as many as you want here! }"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"setting-up-your-data","dir":"Articles","previous_headings":"","what":"Setting up your data","title":"Getting Started","text":"use brolgar work, convert longitudinal data time series tsibble using tsibble package. , need identify unique identifying key, time index. example: learn longitudinal data time series, see vignette: Longitudinal Data Structures.","code":"wages <- as_tsibble(wages,                     key = id,                     index = xp,                     regular = FALSE)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"basic-summaries-of-the-data","dir":"Articles","previous_headings":"","what":"Basic summaries of the data","title":"Getting Started","text":"first get dataset, need get overall sense data.","code":""},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"how-many-observations-are-there","dir":"Articles","previous_headings":"Basic summaries of the data","what":"How many observations are there?","title":"Getting Started","text":"can kind number keys using n_keys(): Note single number, case, 888 observations. However, might want know many observations individual. want number observations variable, can use n_obs() features(). plot can help provide better understanding distribution observations.","code":"n_keys(wages) #> [1] 888 wages %>%   features(ln_wages, n_obs) #> # A tibble: 888 × 2 #>       id n_obs #>    <int> <int> #>  1    31     8 #>  2    36    10 #>  3    53     8 #>  4   122    10 #>  5   134    12 #>  6   145     9 #>  7   155    11 #>  8   173     6 #>  9   206     3 #> 10   207    11 #> # ℹ 878 more rows library(ggplot2) wages %>%   features(ln_wages, n_obs) %>%   ggplot(aes(x = n_obs)) +    geom_bar()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"add_n_obs","dir":"Articles","previous_headings":"Basic summaries of the data > How many observations are there?","what":"add_n_obs()","title":"Getting Started","text":"can add information number observations key add_n_obs(): can use filter() observations: can also look distance experience, understand distribution experience  can explore range experience see common experience ","code":"wages %>% add_n_obs() #> # A tsibble: 6,402 x 10 [!] #> # Key:       id [888] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(dplyr) wages %>%    add_n_obs() %>%   filter(n_obs > 3) #> # A tsibble: 6,145 x 10 [!] #> # Key:       id [764] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,135 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> wages_xp_range <- wages %>%    features(xp,            feat_ranges)  ggplot(wages_xp_range,        aes(x = range_diff)) +    geom_histogram() wages_xp_range %>%    count(range_diff) %>%    mutate(prop = n / sum(n))  #> # A tibble: 829 × 3 #>    range_diff     n    prop #>         <dbl> <int>   <dbl> #>  1     0         38 0.0428  #>  2     0.0150     1 0.00113 #>  3     0.068      1 0.00113 #>  4     0.137      1 0.00113 #>  5     0.153      1 0.00113 #>  6     0.185      1 0.00113 #>  7     0.22       1 0.00113 #>  8     0.225      1 0.00113 #>  9     0.231      1 0.00113 #> 10     0.26       1 0.00113 #> # ℹ 819 more rows"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"efficiently-exploring-longitudinal-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Efficiently exploring longitudinal data","title":"Getting Started","text":"avoid staring plate spaghetti, can look random subset data. Brolgar provides intuitive functions help .","code":""},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"sample_n_keys","dir":"Articles","previous_headings":"Basic summaries of the data > Efficiently exploring longitudinal data","what":"sample_n_keys()","title":"Getting Started","text":"dplyr, can use sample_n() sample n observations. Similarly, brolgar, can take random sample n keys using sample_n_keys():","code":"set.seed(2019-7-15-1300) wages %>%   sample_n_keys(size = 10) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"filtering-observations","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Filtering observations","title":"Getting Started","text":"can combine sample_n_keys() add_n_obs() filter() show keys many observations:  (Note: sample_frac_keys(), samples fraction available keys.) Now, break many plots?","code":"library(dplyr) wages %>%   add_n_obs() %>%   filter(n_obs > 5) %>%   sample_n_keys(size = 10) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"clever-facets-facet_strata","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Clever facets: facet_strata","title":"Getting Started","text":"brolgar provides clever facets help make easier explore data. facet_strata() splits data 12 groups default:  ask split data groups  want show samples per facet?","code":"set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata() set.seed(2019-07-25-1450) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 20)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"clever-facets-facet_sample","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Clever facets: facet_sample","title":"Getting Started","text":"facet_sample() allows specify number keys per facet, number facets n_per_facet n_facets. splits data 12 facets 3 per facet default:  can specify number:  hood, facet_sample() facet_strata() use sample_n_keys() stratify_keys().","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample() set.seed(2019-07-25-1533) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample(n_per_facet = 3,                n_facets = 20)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"exploratory-modelling","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Exploratory modelling","title":"Getting Started","text":"can fit linear model key using key_slope(). returns intercept slope estimate key, given linear model formula. can get number observations, slope information individual identify decreasing time. can join summaries back data: highlight individuals negative slope using gghighlight:","code":"key_slope(wages,ln_wages ~ xp) #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows library(dplyr) wages_slope <- key_slope(wages,ln_wages ~ xp) %>%   left_join(wages, by = \"id\")   wages_slope #> # A tibble: 6,402 × 11 #>       id .intercept .slope_xp ln_wages    xp   ged xp_since_ged black hispanic #>    <int>      <dbl>     <dbl>    <dbl> <dbl> <int>        <dbl> <int>    <int> #>  1    31       1.41    0.101      1.49 0.015     1        0.015     0        1 #>  2    31       1.41    0.101      1.43 0.715     1        0.715     0        1 #>  3    31       1.41    0.101      1.47 1.73      1        1.73      0        1 #>  4    31       1.41    0.101      1.75 2.77      1        2.77      0        1 #>  5    31       1.41    0.101      1.93 3.93      1        3.93      0        1 #>  6    31       1.41    0.101      1.71 4.95      1        4.95      0        1 #>  7    31       1.41    0.101      2.09 5.96      1        5.96      0        1 #>  8    31       1.41    0.101      2.13 6.98      1        6.98      0        1 #>  9    36       2.04    0.0588     1.98 0.315     1        0.315     0        0 #> 10    36       2.04    0.0588     1.80 0.983     1        0.983     0        0 #> # ℹ 6,392 more rows #> # ℹ 2 more variables: high_grade <int>, unemploy_rate <dbl> library(gghighlight)  wages_slope %>%    as_tibble() %>% # workaround for gghighlight + tsibble   ggplot(aes(x = xp,               y = ln_wages,               group = id)) +    geom_line() +   gghighlight(.slope_xp < 0)"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"find-keys-near-other-summaries-with-keys_near","dir":"Articles","previous_headings":"Basic summaries of the data > Exploratory modelling","what":"Find keys near other summaries with keys_near","title":"Getting Started","text":"take slope information find individuals representative min, median, maximum, etc growth, using keys_near():","code":"wages_slope %>%   keys_near(key = id,             var = .slope_xp,             funs = l_three_num) #> # A tibble: 13 × 5 #>       id .slope_xp stat  stat_value stat_diff #>    <int>     <dbl> <fct>      <dbl>     <dbl> #>  1  6863    0.0452 med       0.0452         0 #>  2  6863    0.0452 med       0.0452         0 #>  3  6863    0.0452 med       0.0452         0 #>  4  6863    0.0452 med       0.0452         0 #>  5  6863    0.0452 med       0.0452         0 #>  6  6863    0.0452 med       0.0452         0 #>  7  6863    0.0452 med       0.0452         0 #>  8  6863    0.0452 med       0.0452         0 #>  9  7918   -4.58   min      -4.58           0 #> 10  7918   -4.58   min      -4.58           0 #> 11  7918   -4.58   min      -4.58           0 #> 12 12455   13.2    max      13.2            0 #> 13 12455   13.2    max      13.2            0 wages_slope %>%   keys_near(key = id,             var = .slope_xp,             funs = l_three_num) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"finding-features-in-longitudinal-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Finding features in longitudinal data","title":"Getting Started","text":"can extract features longitudinal data using features function, fabletools. can, example, calculate minimum given variable key providing named list like : brolgar provides sets features, start feat_. example, five number summary feat_five_num: finding whose values increase decrease feat_monotonic","code":"wages %>%   features(ln_wages,             list(min = min)) #> # A tibble: 888 × 2 #>       id   min #>    <int> <dbl> #>  1    31 1.43  #>  2    36 1.80  #>  3    53 1.54  #>  4   122 0.763 #>  5   134 2.00  #>  6   145 1.48  #>  7   155 1.54  #>  8   173 1.56  #>  9   206 2.03  #> 10   207 1.58  #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_monotonic) #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows"},{"path":"https://brolgar.njtierney.com/articles/getting-started.html","id":"linking-individuals-back-to-the-data","dir":"Articles","previous_headings":"Basic summaries of the data","what":"Linking individuals back to the data","title":"Getting Started","text":"can join features back data left_join, like :","code":"wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/id-interesting-obs.html","id":"specify-your-own-summaries-for-keys_near","dir":"Articles","previous_headings":"","what":"Specify your own summaries for keys_near","title":"Identify Interesting Observations","text":"can specify list summaries pass keys_near. example, create summaries give sense range. Note functions start b_, b_summaries provided brolgar sensible defaults. can read , ?b_summaries","code":"l_ranges <- list(min = b_min,                 range_diff = b_range_diff,                 max = b_max,                 iqr = b_iqr)  wages %>%  key_slope(formula = ln_wages ~ xp) %>%  keys_near(key = id,            var = .slope_xp,            funs = l_ranges) #> # A tibble: 4 × 5 #>      id .slope_xp stat       stat_value stat_diff #>   <int>     <dbl> <fct>           <dbl>     <dbl> #> 1  7918    -4.58  min            -4.58   0        #> 2  9357     0.103 iqr             0.102  0.000139 #> 3 12455    13.2   range_diff     17.8    4.58     #> 4 12455    13.2   max            13.2    0"},{"path":"https://brolgar.njtierney.com/articles/id-interesting-obs.html","id":"implementation-of-keys_near","dir":"Articles","previous_headings":"","what":"Implementation of keys_near","title":"Identify Interesting Observations","text":"interested specifics keys_near() works, section describes implemented brolgar. get data right format, steps. First, need get data format statistics interested , along id, statistic interest. can fit linear model key dataset using key_slope(). can perform summary statistic interest, case slope. need convert long format can calculate difference stat slope, .slope_xp: stats diff, can group stat, find return rows smallest difference statistic value:  can see get output using keys_near():","code":"wages_slope <- key_slope(wages, ln_wages ~ xp)  wages_slope #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows wages_slope_all_stats <- wages_slope %>%   mutate_at(.vars = vars(.slope_xp),             .funs = list(.slope_min = b_min,                          .slope_max = b_max,                          .slope_median = b_median,                          .slope_q1 = b_q25,                          .slope_q3 = b_q75)) %>%   select(id,          starts_with(\".slope\"))  wages_slope_all_stats #> # A tibble: 888 × 7 #>       id .slope_xp .slope_min .slope_max .slope_median .slope_q1 .slope_q3 #>    <int>     <dbl>      <dbl>      <dbl>         <dbl>     <dbl>     <dbl> #>  1    31    0.101       -4.58       13.2        0.0480  -0.00769    0.0947 #>  2    36    0.0588      -4.58       13.2        0.0480  -0.00769    0.0947 #>  3    53   -0.358       -4.58       13.2        0.0480  -0.00769    0.0947 #>  4   122    0.0374      -4.58       13.2        0.0480  -0.00769    0.0947 #>  5   134    0.0831      -4.58       13.2        0.0480  -0.00769    0.0947 #>  6   145    0.0469      -4.58       13.2        0.0480  -0.00769    0.0947 #>  7   155    0.0867      -4.58       13.2        0.0480  -0.00769    0.0947 #>  8   173    0.100       -4.58       13.2        0.0480  -0.00769    0.0947 #>  9   206    0.180       -4.58       13.2        0.0480  -0.00769    0.0947 #> 10   207    0.0884      -4.58       13.2        0.0480  -0.00769    0.0947 #> # ℹ 878 more rows wages_slope_all_stats_long <-  wages_slope_all_stats %>% gather(key = \"stat\",          value = \"stat_value\",          -id,          -.slope_xp)  wages_slope_all_stats_long #> # A tibble: 4,440 × 4 #>       id .slope_xp stat       stat_value #>    <int>     <dbl> <chr>           <dbl> #>  1    31    0.101  .slope_min      -4.58 #>  2    36    0.0588 .slope_min      -4.58 #>  3    53   -0.358  .slope_min      -4.58 #>  4   122    0.0374 .slope_min      -4.58 #>  5   134    0.0831 .slope_min      -4.58 #>  6   145    0.0469 .slope_min      -4.58 #>  7   155    0.0867 .slope_min      -4.58 #>  8   173    0.100  .slope_min      -4.58 #>  9   206    0.180  .slope_min      -4.58 #> 10   207    0.0884 .slope_min      -4.58 #> # ℹ 4,430 more rows stats_diff <-  wages_slope_all_stats_long %>%   mutate(stat_diff = abs(.slope_xp - stat_value))  stats_diff #> # A tibble: 4,440 × 5 #>       id .slope_xp stat       stat_value stat_diff #>    <int>     <dbl> <chr>           <dbl>     <dbl> #>  1    31    0.101  .slope_min      -4.58      4.68 #>  2    36    0.0588 .slope_min      -4.58      4.64 #>  3    53   -0.358  .slope_min      -4.58      4.22 #>  4   122    0.0374 .slope_min      -4.58      4.61 #>  5   134    0.0831 .slope_min      -4.58      4.66 #>  6   145    0.0469 .slope_min      -4.58      4.62 #>  7   155    0.0867 .slope_min      -4.58      4.66 #>  8   173    0.100  .slope_min      -4.58      4.68 #>  9   206    0.180  .slope_min      -4.58      4.76 #> 10   207    0.0884 .slope_min      -4.58      4.67 #> # ℹ 4,430 more rows top_stats_diff <-  stats_diff %>%   group_by(stat) %>%   top_n(-1,         wt = stat_diff)  top_stats_diff #> # A tibble: 6 × 5 #> # Groups:   stat [5] #>      id .slope_xp stat          stat_value stat_diff #>   <int>     <dbl> <chr>              <dbl>     <dbl> #> 1  7918  -4.58    .slope_min      -4.58    0         #> 2 12455  13.2     .slope_max      13.2     0         #> 3  2305   0.0480  .slope_median    0.0480  0.0000498 #> 4 10380   0.0479  .slope_median    0.0480  0.0000498 #> 5  2594  -0.00768 .slope_q1       -0.00769 0.0000127 #> 6 12178   0.0946  .slope_q3        0.0947  0.0000579 top_stats_diff %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line() wages %>%   key_slope(ln_wages ~ xp) %>%   keys_near(key = id,             var = .slope_xp) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id,              colour = stat)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"defining-longitudinal-data-as-a-tsibble","dir":"Articles","previous_headings":"","what":"Defining longitudinal data as a tsibble","title":"Longitudinal Data Structures","text":"tools workflows brolgar designed work special tidy time series data frame called tsibble. can define longitudinal data terms time series gain access really useful tools. , need identify three components: key variable data identifier individual. index variable time component data. regularity time interval (index). Longitudinal data typically irregular time periods measurements, can regular measurements. Together, time index key uniquely identify observation repeated measurements term key used lot brolgar, important idea internalise: key identifier individuals series care defining longitudinal data time series? account time series structure inherent longitudinal data, gain access suite nice tools simplify accelerate work time series data. brolgar built top powerful tsibble package Earo Wang, like learn , see official package documentation read paper.","code":""},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"converting-your-longitudinal-data-to-a-time-series","dir":"Articles","previous_headings":"Defining longitudinal data as a tsibble","what":"Converting your longitudinal data to a time series","title":"Longitudinal Data Structures","text":"convert longitudinal data “time series tibble”, tsibble, need consider variables identify: individual, repeated measurements. key time component, index . regularity time interval (index). Together, time index key uniquely identify observation repeated measurements vignette now walks examples converting longitudinal data tsibble.","code":""},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-data-wages","dir":"Articles","previous_headings":"","what":"example data: wages","title":"Longitudinal Data Structures","text":"Let’s look wages data analysed Singer & Willett (2003). data contains measurements hourly wages years workforce, education race covariates. population measured male high-school dropouts, aged 14 17 years first measured. first 10 rows data. create tsibble data ask, “variables identify…”: key, individual, repeated measurements. index, time component. regularity time interval (index). Together, time index key uniquely identify observation repeated measurements , can say : key variable id - subject id, 1-888. index variable xp experience years individual . data irregular since experience fraction year integer. can use information create tsibble data using as_tsibble Note regular = FALSE, since irregular time series Note following information printed top wages says: 6402 rows, 9 columns. ! top means regular spacing series “key” variable listed - id, 888.","code":"library(brolgar) suppressPackageStartupMessages(library(dplyr)) slice(wages, 1:10) %>% knitr::kable() library(tsibble) as_tsibble(x = wages,            key = id,            index = xp,            regular = FALSE) #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> # A tsibble: 6,402 x 9 [!] # Key:       id [888] ..."},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-heights-data","dir":"Articles","previous_headings":"","what":"example: heights data","title":"Longitudinal Data Structures","text":"heights data little simpler wages data, contains average male heights 144 countries 1810-1989, smaller number countries 1500-1800. contains four variables: country continent year height_cm create tsibble data ask, “variables identify…”: key, individual, repeated measurements. index, time component. regularity time interval (index). case: individual person, country time year year regular measurements fixed year point. data already tsibble object, can create tsibble following code:","code":"as_tsibble(x = heights,            key = country,            index = year,            regular = FALSE) #> # A tsibble: 1,490 x 4 [!] #> # Key:       country [144] #>    country     continent  year height_cm #>    <chr>       <chr>     <dbl>     <dbl> #>  1 Afghanistan Asia       1870      168. #>  2 Afghanistan Asia       1880      166. #>  3 Afghanistan Asia       1930      167. #>  4 Afghanistan Asia       1990      167. #>  5 Afghanistan Asia       2000      161. #>  6 Albania     Europe     1880      170. #>  7 Albania     Europe     1890      170. #>  8 Albania     Europe     1900      169. #>  9 Albania     Europe     2000      168. #> 10 Algeria     Africa     1910      169. #> # ℹ 1,480 more rows"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-gapminder","dir":"Articles","previous_headings":"","what":"example: gapminder","title":"Longitudinal Data Structures","text":"gapminder R package contains dataset subset gapminder study (link). contains data life expectancy, GDP per capita, population country. Let’s identify key, individual, repeated measurements. index, time component. regularity time interval (index). fact similar heights dataset: key country index year identify year regular, can bit data exploration using index_summary() shows us year every five - now know regular longitudinal dataset, can encoded like :","code":"library(gapminder) gapminder #> # A tibble: 1,704 × 6 #>    country     continent  year lifeExp      pop gdpPercap #>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> #>  1 Afghanistan Asia       1952    28.8  8425333      779. #>  2 Afghanistan Asia       1957    30.3  9240934      821. #>  3 Afghanistan Asia       1962    32.0 10267083      853. #>  4 Afghanistan Asia       1967    34.0 11537966      836. #>  5 Afghanistan Asia       1972    36.1 13079460      740. #>  6 Afghanistan Asia       1977    38.4 14880372      786. #>  7 Afghanistan Asia       1982    39.9 12881816      978. #>  8 Afghanistan Asia       1987    40.8 13867957      852. #>  9 Afghanistan Asia       1992    41.7 16317921      649. #> 10 Afghanistan Asia       1997    41.8 22227415      635. #> # ℹ 1,694 more rows gapminder %>%    group_by(country) %>%    index_summary(year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    1952    1966    1980    1980    1993    2007 as_tsibble(gapminder,            key = country,            index = year,            regular = TRUE) #> # A tsibble: 1,704 x 6 [5Y] #> # Key:       country [142] #>    country     continent  year lifeExp      pop gdpPercap #>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> #>  1 Afghanistan Asia       1952    28.8  8425333      779. #>  2 Afghanistan Asia       1957    30.3  9240934      821. #>  3 Afghanistan Asia       1962    32.0 10267083      853. #>  4 Afghanistan Asia       1967    34.0 11537966      836. #>  5 Afghanistan Asia       1972    36.1 13079460      740. #>  6 Afghanistan Asia       1977    38.4 14880372      786. #>  7 Afghanistan Asia       1982    39.9 12881816      978. #>  8 Afghanistan Asia       1987    40.8 13867957      852. #>  9 Afghanistan Asia       1992    41.7 16317921      649. #> 10 Afghanistan Asia       1997    41.8 22227415      635. #> # ℹ 1,694 more rows"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"example-pisa-data","dir":"Articles","previous_headings":"","what":"example: PISA data","title":"Longitudinal Data Structures","text":"PISA study measures school students around world series math, reading, science scores. subset data looks like : Let’s identify key, individual, repeated measurements. index, time component. regularity time interval (index). looks like key student_id, nested within school_id country, index year, write following can assess regularity year like : can now convert tsibble:","code":"pisa #> # A tibble: 433 × 11 #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl> as_tsibble(pisa,             key = c(country),            index = year) index_regular(pisa, year) #> [1] TRUE index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018 pisa_ts <- as_tsibble(pisa,            key = country,            index = year,            regular = TRUE)  pisa_ts #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>"},{"path":"https://brolgar.njtierney.com/articles/longitudinal-data-structures.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Longitudinal Data Structures","text":"idea longitudinal data core brolgar. Understanding longitudinal data , can linked time series representation data helps us understand data structure, gives us access flexible tools. vignettes package show time series tsibble useful.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"exploring-raw-data","dir":"Articles","previous_headings":"","what":"Exploring raw data","title":"Visualisation Gallery","text":"first receive data, want look much raw data possible. section discusses techniques make palatable explore raw data without getting much overplotting.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"select-a-sample-of-individuals","dir":"Articles","previous_headings":"Exploring raw data","what":"Select a sample of individuals","title":"Visualisation Gallery","text":"Sample n random individuals explore (Note: Possibly representative) example, can sample 20 random individuals, plot . (perhaps change sample_n_keys sample_id.)","code":"wages %>%   sample_n_keys(size = 20) #> # A tsibble: 128 x 9 [!] #> # Key:       id [20] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1  2389     2.28 0.154     0        0         0        0         10 #>  2  2389     2.15 1.07      0        0         0        0         10 #>  3  2389     2.30 2.15      0        0         0        0         10 #>  4  2389     1.80 3.06      0        0         0        0         10 #>  5  2389     1.76 3.92      1        0         0        0         10 #>  6  2389     2.00 4.57      1        0.648     0        0         10 #>  7  2389     2.39 5.69      1        1.77      0        0         10 #>  8  2389     2.07 6.57      1        2.65      0        0         10 #>  9  2389     2.20 7.51      1        3.59      0        0         10 #> 10  6269     1.71 2.34      1        1.61      0        0          8 #> # ℹ 118 more rows #> # ℹ 1 more variable: unemploy_rate <dbl>  wages %>%   sample_n_keys(size = 20) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"filter-only-those-with-certain-number-of-observations","dir":"Articles","previous_headings":"Exploring raw data","what":"Filter only those with certain number of observations","title":"Visualisation Gallery","text":"variety number observations data - , many. can filter number observations data using add_n_obs(), adds new column, n_obs, number observations key. can filter data based number observations, combine previous steps sample data using sample_n_keys().","code":"wages %>%   add_n_obs() #> # A tsibble: 6,402 x 10 [!] #> # Key:       id [888] #>       id    xp n_obs ln_wages   ged xp_since_ged black hispanic high_grade #>    <int> <dbl> <int>    <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31 0.015     8     1.49     1        0.015     0        1          8 #>  2    31 0.715     8     1.43     1        0.715     0        1          8 #>  3    31 1.73      8     1.47     1        1.73      0        1          8 #>  4    31 2.77      8     1.75     1        2.77      0        1          8 #>  5    31 3.93      8     1.93     1        3.93      0        1          8 #>  6    31 4.95      8     1.71     1        4.95      0        1          8 #>  7    31 5.96      8     2.09     1        5.96      0        1          8 #>  8    31 6.98      8     2.13     1        6.98      0        1          8 #>  9    36 0.315    10     1.98     1        0.315     0        0          9 #> 10    36 0.983    10     1.80     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(dplyr) wages %>%   add_n_obs() %>%   filter(n_obs >= 5) %>%   sample_n_keys(size = 20) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-facet_strata","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets: facet_strata","title":"Visualisation Gallery","text":"brolgar provides clever facets help make easier explore data. facet_strata() splits data 12 groups default:  can control number n_strata:  regular control facet options:","code":"set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata() set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 6) set.seed(2019-07-23-1936) library(ggplot2) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_strata(n_strata = 6,                nrow = 3,                ncol = 2)"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-facet_sample","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets: facet_sample","title":"Visualisation Gallery","text":"facet_sample() allows specify number samples per plot , “n per plot” number facets show “n facets”. default splits data 12 facets 3 per group:  allows look larger sample data.","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"clever-facets-with-number-of-observations","dir":"Articles","previous_headings":"Exploring raw data","what":"Clever facets with number of observations","title":"Visualisation Gallery","text":"can combine add_n_obs() filter() show series 5 observations:  approaches allow view large sections raw data, point individuals “interesting”, sense outliers, representative middle group.","code":"set.seed(2019-07-23-1937) wages %>%   add_n_obs() %>%   filter(n_obs >= 5) %>% ggplot(aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"exploring-data-using-features","dir":"Articles","previous_headings":"","what":"Exploring data using features","title":"Visualisation Gallery","text":"can plot features data first identifying features interest joining back data. details explanation , see vignette, “Finding Features”.","code":""},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"plot-monotonic-individual-series","dir":"Articles","previous_headings":"Exploring data using features","what":"Plot monotonic individual series","title":"Visualisation Gallery","text":"example, plot whose values increase decrease feat_monotonic gghighlight:  can explore available features, see function References","code":"library(gghighlight) wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase)"},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"plot-individuals-with-negative-slope","dir":"Articles","previous_headings":"Exploring data using features","what":"Plot individuals with negative slope","title":"Visualisation Gallery","text":"can find individuals negative slope using key_slope. detail key_slope, see Exploratory Modelling vignette. key_slope fits linear model key, returns tibble key columns .intercept .slope_<varname>, explanatory variables. can use gghighlight identify individuals overall negative slope:  positive slope  even facet slope:","code":"wages %>% key_slope(ln_wages ~ xp) #> # A tibble: 888 × 3 #>       id .intercept .slope_xp #>    <int>      <dbl>     <dbl> #>  1    31       1.41    0.101  #>  2    36       2.04    0.0588 #>  3    53       2.29   -0.358  #>  4   122       1.93    0.0374 #>  5   134       2.03    0.0831 #>  6   145       1.59    0.0469 #>  7   155       1.66    0.0867 #>  8   173       1.61    0.100  #>  9   206       1.73    0.180  #> 10   207       1.62    0.0884 #> # ℹ 878 more rows library(dplyr) wages_slope <- wages %>%   key_slope(ln_wages ~ xp) %>%   left_join(wages, by = \"id\")  gg_wages_slope <- ggplot(wages_slope,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line()   gg_wages_slope +    gghighlight(.slope_xp < 0) gg_wages_slope +    gghighlight(.slope_xp > 0) gg_wages_slope +    facet_wrap(~.slope_xp > 0)"},{"path":[]},{"path":"https://brolgar.njtierney.com/articles/visualisation-gallery.html","id":"visualise-along-slope","dir":"Articles","previous_headings":"Move along features with facet_strata","what":"Visualise along slope","title":"Visualisation Gallery","text":"can use along argument facet_strata() break data according feature. catch data passed must tsibble. example, break data along .slope_xp variable 12 groups, default arranged descending order. groups broken positive slope negative.  along features five number summary:  move along minimum:  move along maximum:  move along median:  hood needs summarisation data arrange like , details implementation helpfile ?facet_strata.","code":"wages_slope <- wages %>%   key_slope(ln_wages ~ xp) %>%   # ensures that we keep the data as a `tsibble`   left_join(x = wages, y = ., by = \"id\")  gg_wages_slope <- ggplot(wages_slope,        aes(x = xp,            y = ln_wages,            group = id)) +    geom_line()   gg_wages_slope +   facet_strata(n_strata = 12,                along = .slope_xp) wages_five <- wages %>%     features(ln_wages, feat_five_num) %>%   # ensures that we keep the data as a `tsibble`   left_join(x = wages, y = ., by = \"id\")  wages_five #> # A tsibble: 6,402 x 14 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 6 more variables: unemploy_rate <dbl>, min <dbl>, q25 <dbl>, med <dbl>, #> #   q75 <dbl>, max <dbl> gg_wages_five <- ggplot(wages_five,                          aes(x = xp,                              y = ln_wages,                              group = id)) +                       geom_line()   gg_wages_five gg_wages_five +   facet_strata(n_strata = 12,                along = min) gg_wages_five +   facet_strata(n_strata = 12,                along = max) gg_wages_five +   facet_strata(n_strata = 12,                along = med)"},{"path":"https://brolgar.njtierney.com/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Nicholas Tierney. Author, maintainer. Di Cook. Author. Tania Prvan. Author. Stuart Lee. Contributor. Earo Wang. Contributor.","code":""},{"path":"https://brolgar.njtierney.com/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Tierney N, Cook D, Prvan T (2022). “R Journal: brolgar: R package BRowse Longitudinal Data Graphically Analytically R.” R Journal, 14, 6-25. ISSN 2073-4859, doi:10.32614/RJ-2022-023, https://doi.org/10.32614/RJ-2022-023.","code":"@Article{,   title = {The R Journal: brolgar: An R package to BRowse Over Longitudinal Data Graphically and Analytically in R},   author = {Nicholas Tierney and Di Cook and Tania Prvan},   journal = {The R Journal},   year = {2022},   volume = {14},   issue = {2},   pages = {6-25},   note = {https://doi.org/10.32614/RJ-2022-023},   issn = {2073-4859},   doi = {10.32614/RJ-2022-023}, }"},{"path":"https://brolgar.njtierney.com/index.html","id":"brolgar","dir":"","previous_headings":"","what":"Browse Over Longitudinal Data Graphically and Analytically in R","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"brolgar helps browse longitudinal data graphically analytically R, providing tools : Efficiently explore raw longitudinal data Calculate features (summaries) individuals Evaluate diagnostics statistical models helps go “plate spaghetti” plot left, “interesting observations” plot right.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Install GitHub : R Universe :","code":"# install.packages(\"remotes\") remotes::install_github(\"njtierney/brolgar\") # Enable this universe options(repos = c(     njtierney = 'https://njtierney.r-universe.dev',     CRAN = 'https://cloud.r-project.org')     )  # Install some packages install.packages('brolgar')"},{"path":"https://brolgar.njtierney.com/index.html","id":"using-brolgar-we-need-to-talk-about-data","dir":"","previous_headings":"","what":"Using brolgar: We need to talk about data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"many ways describe longitudinal data - panel data, cross-sectional data, time series. define longitudinal data : individuals repeatedly measured time. tools workflows brolgar designed work special tidy time series data frame called tsibble. can define longitudinal data terms time series gain access really useful tools. , need identify three components: key variable data identifier individual. index variable time component data. regularity time interval (index). Longitudinal data typically irregular time periods measurements, can regular measurements. Together, time index key uniquely identify observation. term key used lot brolgar, important idea internalise: key identifier individuals series Identifying key, index, regularity data can challenge. can learn specifying vignette, “Longitudinal Data Structures”.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"the-wages-data","dir":"","previous_headings":"","what":"The wages data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"wages data example dataset provided brolgar. looks like : hood, created following setup: as_tsibble() takes wages, key, index, state regular = FALSE (since regular time periods measurements). turns data tsibble object - powerful data abstraction made available tsibble package Earo Wang, like learn tsibble, see official package documentation read paper.","code":"wages #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> wages <- as_tsibble(x = wages,                     key = id,                     index = xp,                     regular = FALSE)"},{"path":"https://brolgar.njtierney.com/index.html","id":"efficiently-exploring-longitudinal-data","dir":"","previous_headings":"","what":"Efficiently exploring longitudinal data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Exploring longitudinal data can challenging many individuals. difficult look ! often get “plate spaghetti” plot, many lines plotted top . can avoid spaghetti looking random subset data using tools brolgar.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"sample_n_keys","dir":"","previous_headings":"","what":"sample_n_keys()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"dplyr, can use sample_n() sample n observations, sample_frac() look fraction observations. brolgar builds providing sample_n_keys() sample_frac_keys(). allows take random sample n keys using sample_n_keys(). example:  want create many plots?","code":"set.seed(2019-7-15-1300) wages %>%   sample_n_keys(size = 5) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/index.html","id":"clever-facets-facet_sample","dir":"","previous_headings":"","what":"Clever facets: facet_sample()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"facet_sample() allows specify number keys per facet, number facets n_per_facet n_facets. default, splits data 12 facets 5 per facet:  hood, facet_sample() powered sample_n_keys() stratify_keys(). can see facets (e.g., facet_strata()) data visualisations can make brolgar Visualisation Gallery.","code":"set.seed(2019-07-23-1937) ggplot(wages,        aes(x = xp,            y = ln_wages,            group = id)) +   geom_line() +   facet_sample()"},{"path":"https://brolgar.njtierney.com/index.html","id":"finding-features-in-longitudinal-data","dir":"","previous_headings":"","what":"Finding features in longitudinal data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Sometimes want know range summary variable individual. call summaries features data, can extracted using features function, fabletools. example, want answer question “summary wages individual?”. can use features() find five number summary (min, max, q1, q3, median) ln_wages feat_five_num: returns id, features. many features brolgar - features begin feat_. can, example, find whose ln_wages values increase decrease feat_monotonic: can read creating using features Finding Features vignette. can also see features time series feasts package.","code":"wages %>%   features(ln_wages,            feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows wages %>%   features(ln_wages, feat_monotonic) #> # A tibble: 888 × 5 #>       id increase decrease unvary monotonic #>    <int> <lgl>    <lgl>    <lgl>  <lgl>     #>  1    31 FALSE    FALSE    FALSE  FALSE     #>  2    36 FALSE    FALSE    FALSE  FALSE     #>  3    53 FALSE    FALSE    FALSE  FALSE     #>  4   122 FALSE    FALSE    FALSE  FALSE     #>  5   134 FALSE    FALSE    FALSE  FALSE     #>  6   145 FALSE    FALSE    FALSE  FALSE     #>  7   155 FALSE    FALSE    FALSE  FALSE     #>  8   173 FALSE    FALSE    FALSE  FALSE     #>  9   206 TRUE     FALSE    FALSE  TRUE      #> 10   207 FALSE    FALSE    FALSE  FALSE     #> # ℹ 878 more rows"},{"path":"https://brolgar.njtierney.com/index.html","id":"linking-individuals-back-to-the-data","dir":"","previous_headings":"","what":"Linking individuals back to the data","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"created features, can join back data left_join, like :","code":"wages %>%   features(ln_wages, feat_monotonic) %>%   left_join(wages, by = \"id\") %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +   geom_line() +    gghighlight(increase) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: id #> Too many data series, skip labeling"},{"path":[]},{"path":"https://brolgar.njtierney.com/index.html","id":"n_obs","dir":"","previous_headings":"","what":"n_obs()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Return number observations total n_obs():","code":"n_obs(wages) #> n_obs  #>  6402"},{"path":"https://brolgar.njtierney.com/index.html","id":"n_keys","dir":"","previous_headings":"","what":"n_keys()","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"number keys data using n_keys():","code":"n_keys(wages) #> [1] 888"},{"path":"https://brolgar.njtierney.com/index.html","id":"finding-the-number-of-observations-per-key","dir":"","previous_headings":"","what":"Finding the number of observations per key.","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"can also use n_obs() inside features return number observations key: returns dataframe, one row per key, number observations key. summarised get sense patterns number observations:","code":"wages %>%   features(ln_wages, n_obs) #> # A tibble: 888 × 2 #>       id n_obs #>    <int> <int> #>  1    31     8 #>  2    36    10 #>  3    53     8 #>  4   122    10 #>  5   134    12 #>  6   145     9 #>  7   155    11 #>  8   173     6 #>  9   206     3 #> 10   207    11 #> # ℹ 878 more rows library(ggplot2) wages %>%   features(ln_wages, n_obs) %>%   ggplot(aes(x = n_obs)) +      geom_bar() wages %>%   features(ln_wages, n_obs) %>%   summary() #>        id            n_obs        #>  Min.   :   31   Min.   : 1.000   #>  1st Qu.: 3332   1st Qu.: 5.000   #>  Median : 6666   Median : 8.000   #>  Mean   : 6343   Mean   : 7.209   #>  3rd Qu.: 9194   3rd Qu.: 9.000   #>  Max.   :12543   Max.   :13.000"},{"path":"https://brolgar.njtierney.com/index.html","id":"further-reading","dir":"","previous_headings":"","what":"Further Reading","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"brolgar provides useful functions explore data, can read exploratory modelling Identify Interesting Observations vignettes. taster, figures can produce:","code":"#> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: id #> Too many data series, skip labeling #> Warning in left_join(., wages, by = \"id\"): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1077 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #>   \"many-to-many\"` to silence this warning."},{"path":"https://brolgar.njtierney.com/index.html","id":"related-work","dir":"","previous_headings":"","what":"Related work","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"One sources inspiration work lasangar R package Bryan Swihart (paper). even expansive time series summarisation, make sure check feasts package (talk!).","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Please note brolgar project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"a-note-on-the-api","dir":"","previous_headings":"","what":"A Note on the API","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"version brolgar forked tprvan/brolgar, undergone breaking changes API.","code":""},{"path":"https://brolgar.njtierney.com/index.html","id":"acknowledgements","dir":"","previous_headings":"","what":"Acknowledgements","title":"Browse Over Longitudinal Data Graphically and Analytically in R","text":"Thank Mitchell O’Hara-Wild Earo Wang many useful discussions implementation brolgar, heavily inspired feasts package tidyverts. also like thank Tania Prvan valuable early contributions project, well Stuart Lee helpful discussions. Thanks also Ursula Laa feedback package structure documentation.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":null,"dir":"Reference","previous_headings":"","what":"Add the number of observations for each key in a tsibble — add_n_obs","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":", counting number rows dataset, rather counting number observations keys data.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"","code":"add_n_obs(.data, ...)"},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":".data tsibble ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"tsibble n_obs, number observations per key added.","code":""},{"path":"https://brolgar.njtierney.com/reference/add_n_obs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add the number of observations for each key in a tsibble — add_n_obs","text":"","code":"library(dplyr) #>  #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #>  #>     filter, lag #> The following objects are masked from ‘package:base’: #>  #>     intersect, setdiff, setequal, union # you can explore the data to see those cases that have exactly two   # observations: heights %>%    add_n_obs() %>%    filter(n_obs == 2) #> # A tsibble: 16 x 5 [!] #> # Key:       country [8] #>    country              year n_obs continent height_cm #>    <chr>               <dbl> <int> <chr>         <dbl> #>  1 Botswana             1910     2 Africa         165. #>  2 Botswana             1980     2 Africa         167. #>  3 Burundi              1920     2 Africa         166. #>  4 Burundi              1930     2 Africa         169. #>  5 Costa Rica           1940     2 Americas       166. #>  6 Costa Rica           1980     2 Americas       174. #>  7 El Salvador          1990     2 Americas       169. #>  8 El Salvador          2000     2 Americas       171. #>  9 Libya                1890     2 Africa         166. #> 10 Libya                1920     2 Africa         165. #> 11 Mongolia             1910     2 Asia           163. #> 12 Mongolia             1930     2 Asia           165. #> 13 Singapore            1970     2 Asia           172. #> 14 Singapore            2000     2 Asia           175. #> 15 Trinidad and Tobago  1980     2 Americas       174. #> 16 Trinidad and Tobago  2000     2 Americas       174."},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":null,"dir":"Reference","previous_headings":"","what":"Brolgar summaries (b_summaries) — b_min","title":"Brolgar summaries (b_summaries) — b_min","text":"Customised summaries vectors appropriate defaults longitudinal data. functions prefixed b_ assist autocomplete. uses na.rm = TRUE , calculations involving quantiles, type = 8 names = FALSE. Summaries include: * b_min: minimum * b_max: maximum * b_median: median * b_mean: mean * b_q25: 25th quantile * b_q75: 75th quantile * b_range: range * b_range_diff: difference range (max - min) * b_sd: standard deviation * b_var: variance * b_mad: mean absolute deviation * b_iqr: Inter-quartile range * b_diff_var: variance diff() * b_diff_sd: standard deviation diff() * b_diff_mean: mean diff() * b_diff_median: median diff() * b_diff_q25: q25 diff() * b_diff_q75: q75 diff()","code":""},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Brolgar summaries (b_summaries) — b_min","text":"","code":"b_min(x, ...)  b_max(x, ...)  b_median(x, ...)  b_mean(x, ...)  b_q25(x, ...)  b_q75(x, ...)  b_range(x, ...)  b_range_diff(x, ...)  b_sd(x, ...)  b_var(x, ...)  b_mad(x, ...)  b_iqr(x, ...)  b_diff_var(x, ...)  b_diff_sd(x, ...)  b_diff_mean(x, ...)  b_diff_median(x, ...)  b_diff_q25(x, ...)  b_diff_q75(x, ...)  b_diff_max(x, ...)  b_diff_min(x, ...)  b_diff_iqr(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Brolgar summaries (b_summaries) — b_min","text":"x vector ... arguments pass","code":""},{"path":"https://brolgar.njtierney.com/reference/b_summaries.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Brolgar summaries (b_summaries) — b_min","text":"","code":"x <- c(1:5, NA, 5:1) min(x) #> [1] NA b_min(x) #> [1] 1 max(x) #> [1] NA b_max(x) #> [1] 5 median(x) #> [1] NA b_median(x) #> [1] 3 mean(x) #> [1] NA b_mean(x) #> [1] 3 range(x) #> [1] NA NA b_range(x) #> [1] 1 5 var(x) #> [1] NA b_var(x) #> [1] 2.222222 sd(x) #> [1] NA b_sd(x) #> [1] 1.490712"},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"can calculate series summary statistics (features) given variable dataset. example, three number summary, minimum, median, maximum, can calculated given variable. designed work features() function shown examples. available features brolgar include: feat_three_num() - minimum, median, maximum feat_five_num() - minimum, q25, median, q75, maximum. feat_ranges() - min, max, range difference, interquartile range. feat_spread()  - variance, standard deviation, median absolute distance, interquartile range feat_monotonic() - always increasing, decreasing, unvarying? feat_diff_summary() - summary statistics differences amongst value, including five number summary, well standard deviation variance. Returns NA one observation, take difference one observation, difference 0 cases misleading. feat_brolgar()  features brolgar.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"","code":"feat_three_num(x, ...)  feat_five_num(x, ...)  feat_ranges(x, ...)  feat_spread(x, ...)  feat_monotonic(x, ...)  feat_brolgar(x, ...)  feat_diff_summary(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"x vector extract features . ... arguments passed functions.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-features.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate features of a tsibble object in conjunction with features() — brolgar-features","text":"","code":"# You can use any of the features `feat_*` in conjunction with `features`  # like so: heights %>%   features(height_cm, # variable you want to explore            feat_three_num) # the feature summarisation you want to perform #> # A tibble: 144 × 4 #>    country       min   med   max #>    <chr>       <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  167.  168. #>  2 Albania      168.  170.  170. #>  3 Algeria      166.  169   171. #>  4 Angola       159.  167.  169. #>  5 Argentina    167.  168.  174. #>  6 Armenia      164.  169.  172. #>  7 Australia    170   172.  178. #>  8 Austria      162.  167.  179. #>  9 Azerbaijan   170.  172.  172. #> 10 Bahrain      161.  164.  164  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":null,"dir":"Reference","previous_headings":"","what":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"Provides framework tools summarise, visualise, explore longitudinal data. builds upon tidy time series data frames used 'tsibble' package, designed integrate within 'tidyverse', 'tidyverts' (time series) ecosystems. methods implemented include calculating features understanding longitudinal data, including calculating summary statistics quantiles, medians, numeric ranges, sampling individual series, identifying individual series representative group, extending facet system 'ggplot2' facilitate exploration samples data. methods fully described paper \"brolgar: R package Browse Longitudinal Data Graphically Analytically R\", Nicholas Tierney, Dianne Cook, Tania Prvan (2020) arXiv:2012.01619.","code":""},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"brolgar stands : BRowse Longitudinal data Graphically Analytically R.","code":""},{"path":[]},{"path":"https://brolgar.njtierney.com/reference/brolgar-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"brolgar: Browse Over Longitudinal Data Graphically and Analytically in R — brolgar-package","text":"Maintainer: Nicholas Tierney nicholas.tierney@gmail.com (ORCID) Authors: Di Cook dicook@monash.edu (ORCID) Tania Prvan tania.prvan@mq.edu.au contributors: Stuart Lee [contributor] Earo Wang [contributor]","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":null,"dir":"Reference","previous_headings":"","what":"Facet data into groups to facilitate exploration — facet_sample","title":"Facet data into groups to facilitate exploration — facet_sample","text":"function requires tbl_ts object, can created tsibble::as_tsibble(). hood, facet_strata powered stratify_keys() sample_n_keys().","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facet data into groups to facilitate exploration — facet_sample","text":"","code":"facet_sample(   n_per_facet = 3,   n_facets = 12,   nrow = NULL,   ncol = NULL,   scales = \"fixed\",   shrink = TRUE,   strip.position = \"top\" )"},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facet data into groups to facilitate exploration — facet_sample","text":"n_per_facet Number keys per facet want plot. Default 3. n_facets Number facets create. Default 12 nrow, ncol Number rows columns. scales scales fixed (\"fixed\", default), free (\"free\"), free one dimension (\"free_x\", \"free_y\")? shrink TRUE, shrink scales fit output statistics, raw data. FALSE, range raw data statistical summary. strip.position default, labels displayed top plot. Using strip.position possible place labels either four sides setting strip.position = c(\"top\",   \"bottom\", \"left\", \"right\")","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facet data into groups to facilitate exploration — facet_sample","text":"ggplot object","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_sample.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facet data into groups to facilitate exploration — facet_sample","text":"","code":"library(ggplot2) ggplot(heights, aes(x = year,     y = height_cm,     group = country)) +   geom_line() +   facet_sample()   ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_sample(n_per_facet = 1,                n_facets = 12)"},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":null,"dir":"Reference","previous_headings":"","what":"Facet data into groups to facilitate exploration — facet_strata","title":"Facet data into groups to facilitate exploration — facet_strata","text":"function requires tbl_ts object, can created tsibble::as_tsibble(). hood, facet_strata powered stratify_keys().","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facet data into groups to facilitate exploration — facet_strata","text":"","code":"facet_strata(   n_strata = 12,   along = NULL,   fun = mean,   nrow = NULL,   ncol = NULL,   scales = \"fixed\",   shrink = TRUE,   strip.position = \"top\" )"},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facet data into groups to facilitate exploration — facet_strata","text":"n_strata number groups create along variable stratify along. groups key takes summary statistic (default, mean). arranges mean value key assigns n_strata groups. fun summary function. Default mean. nrow, ncol Number rows columns. scales scales fixed (\"fixed\", default), free (\"free\"), free one dimension (\"free_x\", \"free_y\")? shrink TRUE, shrink scales fit output statistics, raw data. FALSE, range raw data statistical summary. strip.position default, labels displayed top plot. Using strip.position possible place labels either four sides setting strip.position = c(\"top\",   \"bottom\", \"left\", \"right\")","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facet data into groups to facilitate exploration — facet_strata","text":"ggplot object","code":""},{"path":"https://brolgar.njtierney.com/reference/facet_strata.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facet data into groups to facilitate exploration — facet_strata","text":"","code":"library(ggplot2) ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata()      ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_wrap(~continent)   ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata(along = year)   # \\donttest{ library(dplyr) heights %>%   key_slope(height_cm ~ year) %>%   right_join(heights, ., by = \"country\") %>%   ggplot(aes(x = year,              y = height_cm)) +   geom_line(aes(group = country)) +   geom_smooth(method = \"lm\") +    facet_strata(along = .slope_year) #> `geom_smooth()` using formula = 'y ~ x'  # }"},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":null,"dir":"Reference","previous_headings":"","what":"World Height Data — heights","title":"World Height Data — heights","text":"Average male heights 144 countries 1810-1989, smaller number countries 1500-1800. Data filtered include countries one observation.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"World Height Data — heights","text":"","code":"heights"},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"World Height Data — heights","text":"object class tbl_ts (inherits tbl_df, tbl, data.frame) 1490 rows 4 columns.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"World Height Data — heights","text":"heights stored time series tsibble object. contains variables: country: Country. forms identifying key. year: Year. forms time index. height_cm: Average male height centimeters. continent: continent extracted country name using countrycode package (https://joss.theoj.org/papers/10.21105/joss.00848). information, see article: \"tall others short? Agricultural production proximate determinants global heights\",  Joerg Baten Matthias Blum, European Review Economic History 18 (2014), 144–165. Data available https://datasets.iisg.amsterdam/dataset.xhtml?persistentId=hdl:10622/IAEKLA, accessed via Clio Infra website.","code":""},{"path":"https://brolgar.njtierney.com/reference/heights.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"World Height Data — heights","text":"","code":"# show the data heights #> # A tsibble: 1,490 x 4 [!] #> # Key:       country [144] #>    country     continent  year height_cm #>    <chr>       <chr>     <dbl>     <dbl> #>  1 Afghanistan Asia       1870      168. #>  2 Afghanistan Asia       1880      166. #>  3 Afghanistan Asia       1930      167. #>  4 Afghanistan Asia       1990      167. #>  5 Afghanistan Asia       2000      161. #>  6 Albania     Europe     1880      170. #>  7 Albania     Europe     1890      170. #>  8 Albania     Europe     1900      169. #>  9 Albania     Europe     2000      168. #> 10 Algeria     Africa     1910      169. #> # ℹ 1,480 more rows  # show the spaghetti plot (ugh!) library(ggplot2) ggplot(heights,         aes(x = year,             y = height_cm,             group = country)) +      geom_line()       # Explore all samples with `facet_strata()` ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_strata()   # Explore the heights over each continent ggplot(heights,        aes(x = year,            y = height_cm,            group = country)) +   geom_line() +   facet_wrap(~continent)     # explore the five number summary of height_cm with `features` heights %>%    features(height_cm, feat_five_num) #> # A tibble: 144 × 6 #>    country       min   q25   med   q75   max #>    <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> #>  1 Afghanistan  161.  164.  167.  168.  168. #>  2 Albania      168.  168.  170.  170.  170. #>  3 Algeria      166.  168.  169   170.  171. #>  4 Angola       159.  160.  167.  168.  169. #>  5 Argentina    167.  168.  168.  170.  174. #>  6 Armenia      164.  166.  169.  172.  172. #>  7 Australia    170   171.  172.  173.  178. #>  8 Austria      162.  164.  167.  169.  179. #>  9 Azerbaijan   170.  171.  172.  172.  172. #> 10 Bahrain      161.  161.  164.  164.  164  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":null,"dir":"Reference","previous_headings":"","what":"Index summaries — index_summary","title":"Index summaries — index_summary","text":"functions check index regular (index_regular()), summarise index variable (index_summary()). can useful check index variables.","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Index summaries — index_summary","text":"","code":"index_regular(.data, ...)  # S3 method for tbl_ts index_regular(.data, ...)  # S3 method for data.frame index_regular(.data, index, ...)  index_summary(.data, ...)  # S3 method for tbl_ts index_summary(.data, ...)  # S3 method for data.frame index_summary(.data, index, ...)"},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Index summaries — index_summary","text":".data data.frame tsibble ... extra arguments index proposed index variable","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Index summaries — index_summary","text":"logical  TRUE means regular, FALSE means ","code":""},{"path":"https://brolgar.njtierney.com/reference/index_summary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Index summaries — index_summary","text":"","code":"# a tsibble index_regular(heights) #> [1] FALSE  # some data frames index_regular(pisa, year) #> [1] TRUE index_regular(airquality, Month) #> [1] TRUE  # a tsibble index_summary(heights) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    1550    1730    1820    1818    1910    2000  # some data frames index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018  index_summary(airquality, Month) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>       5       6       7       7       8       9  index_summary(airquality, Day) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     1.0     8.5    16.0    16.0    23.5    31.0"},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit linear model for each key — key_slope","title":"Fit linear model for each key — key_slope","text":"Using key_slope can fit linear model key tsibble. add_key_slope adds slope information back data, returns full dimension tsibble.","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit linear model for each key — key_slope","text":"","code":"key_slope(.data, formula, ...)  add_key_slope(.data, formula)  add_key_slope.default(.data, formula)"},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit linear model for each key — key_slope","text":".data tsibble formula formula ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit linear model for each key — key_slope","text":"tibble coefficient information","code":""},{"path":"https://brolgar.njtierney.com/reference/key_slope.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit linear model for each key — key_slope","text":"","code":"key_slope(heights, height_cm ~ year) #> # A tibble: 144 × 3 #>    country     .intercept .slope_year #>    <chr>            <dbl>       <dbl> #>  1 Afghanistan      217.      -0.0263 #>  2 Albania          202.      -0.0170 #>  3 Algeria          111.       0.0297 #>  4 Angola            43.9      0.0648 #>  5 Argentina        147.       0.0117 #>  6 Armenia           87.9      0.0419 #>  7 Australia         46.1      0.0665 #>  8 Austria           38.2      0.0695 #>  9 Azerbaijan       150.       0.0111 #> 10 Bahrain         -157.       0.165  #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"","code":"# S3 method for data.frame keys_near(.data, key, var, top_n = 1, funs = l_five_num, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":".data data.frame key key, identifies unique observations. var variable summarise top_n top number closest observations return - default 1, also return ties. funs named list functions summarise . Default given list five number summary, l_five_num. ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.data.frame.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near.data.frame","text":"","code":"heights %>%   key_slope(height_cm ~ year) %>%   keys_near(key = country,             var = .slope_year) #> # A tibble: 6 × 5 #>   country    .slope_year stat  stat_value stat_diff #>   <chr>            <dbl> <fct>      <dbl>     <dbl> #> 1 Austria         0.0695 q_75      0.0690  0.000515 #> 2 Burundi         0.321  max       0.321   0        #> 3 Eritrea        -0.102  min      -0.102   0        #> 4 Mali            0.0401 med       0.0403  0.000120 #> 5 Spain           0.0404 med       0.0403  0.000120 #> 6 Tajikistan      0.0199 q_25      0.0205  0.000632 # Specify your own list of summaries l_ranges <- list(min = b_min,                  range_diff = b_range_diff,                  max = b_max,                  iqr = b_iqr)  heights %>%   key_slope(formula = height_cm ~ year) %>%   keys_near(key = country,               var = .slope_year,               funs = l_ranges) #> # A tibble: 4 × 5 #>   country     .slope_year stat       stat_value stat_diff #>   <chr>             <dbl> <fct>           <dbl>     <dbl> #> 1 Burundi          0.321  range_diff     0.424    0.102   #> 2 Burundi          0.321  max            0.321    0       #> 3 Eritrea         -0.102  min           -0.102    0       #> 4 Switzerland      0.0496 iqr            0.0485   0.00116"},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"","code":"keys_near(.data, ...)  # S3 method for default keys_near(.data, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near","text":".data tsibble ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"data.frame containing keys closest given statistic.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near","text":"","code":"keys_near(heights, height_cm) #> # A tibble: 18 × 5 #>    country          height_cm stat  stat_value stat_diff #>    <chr>                <dbl> <fct>      <dbl>     <dbl> #>  1 Denmark               183. max         183.   0       #>  2 Ethiopia              167. med         167.   0.00900 #>  3 Ghana                 164. q_25        164.   0       #>  4 Hungary               164. q_25        164.   0       #>  5 Italy                 164. q_25        164.   0       #>  6 Italy                 164. q_25        164.   0       #>  7 Liberia               167. med         167.   0.00900 #>  8 Morocco               170. q_75        170.   0.00392 #>  9 Mozambique            164. q_25        164.   0       #> 10 Mozambique            164. q_25        164.   0       #> 11 Pakistan              164. q_25        164.   0       #> 12 Papua New Guinea      152. min         152.   0       #> 13 Romania               164. q_25        164.   0       #> 14 Romania               164. q_25        164.   0       #> 15 Sierra Leone          164. q_25        164.   0       #> 16 Slovakia              164. q_25        164.   0       #> 17 Ukraine               164. q_25        164.   0       #> 18 Ukraine               164. q_25        164.   0"},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":null,"dir":"Reference","previous_headings":"","what":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"Return keys nearest given statistics summary.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"","code":"# S3 method for tbl_ts keys_near(.data, var, top_n = 1, funs = l_five_num, stat_as_factor = TRUE, ...)"},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":".data tsibble var variable summarise top_n top number closest observations return - default 1, also return ties. funs named list functions summarise . Default given list five number summary, l_five_num. stat_as_factor coerce stat variable factor? Default TRUE. ... extra arguments pass mutate_at performing summary given funs.","code":""},{"path":"https://brolgar.njtierney.com/reference/keys_near.tbl_ts.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return keys nearest to a given statistics or summary. — keys_near.tbl_ts","text":"","code":"# Return observations closest to the five number summary of height_cm heights %>%   keys_near(var = height_cm) #> # A tibble: 18 × 5 #>    country          height_cm stat  stat_value stat_diff #>    <chr>                <dbl> <fct>      <dbl>     <dbl> #>  1 Denmark               183. max         183.   0       #>  2 Ethiopia              167. med         167.   0.00900 #>  3 Ghana                 164. q_25        164.   0       #>  4 Hungary               164. q_25        164.   0       #>  5 Italy                 164. q_25        164.   0       #>  6 Italy                 164. q_25        164.   0       #>  7 Liberia               167. med         167.   0.00900 #>  8 Morocco               170. q_75        170.   0.00392 #>  9 Mozambique            164. q_25        164.   0       #> 10 Mozambique            164. q_25        164.   0       #> 11 Pakistan              164. q_25        164.   0       #> 12 Papua New Guinea      152. min         152.   0       #> 13 Romania               164. q_25        164.   0       #> 14 Romania               164. q_25        164.   0       #> 15 Sierra Leone          164. q_25        164.   0       #> 16 Slovakia              164. q_25        164.   0       #> 17 Ukraine               164. q_25        164.   0       #> 18 Ukraine               164. q_25        164.   0"},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":null,"dir":"Reference","previous_headings":"","what":"A named list of the five number summary — l_funs","title":"A named list of the five number summary — l_funs","text":"Designed use keys_near() function.","code":""},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A named list of the five number summary — l_funs","text":"","code":"l_five_num  l_three_num"},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"A named list of the five number summary — l_funs","text":"object class list length 5. object class list length 3.","code":""},{"path":"https://brolgar.njtierney.com/reference/l_funs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A named list of the five number summary — l_funs","text":"","code":"# Specify your own list of summaries l_ranges <- list(min = b_min,                  range_diff = b_range_diff,                  max = b_max,                  iqr = b_iqr)  heights %>%   key_slope(formula = height_cm ~ year) %>%   keys_near(key = country,               var = .slope_year,               funs = l_ranges) #> # A tibble: 4 × 5 #>   country     .slope_year stat       stat_value stat_diff #>   <chr>             <dbl> <fct>           <dbl>     <dbl> #> 1 Burundi          0.321  range_diff     0.424    0.102   #> 2 Burundi          0.321  max            0.321    0       #> 3 Eritrea         -0.102  min           -0.102    0       #> 4 Switzerland      0.0496 iqr            0.0485   0.00116"},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":null,"dir":"Reference","previous_headings":"","what":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"provides three families functions tell values always increasing, decreasing, unvarying, functions, increasing(), decreasing(), unvarying(). hood uses diff find differences, like can pass extra arguments diff.","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"","code":"increasing(x, ...)  decreasing(x, ...)  unvarying(x, ...)  monotonic(x, ...)"},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"x numeric integer ... extra arguments pass diff","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"logical TRUE FALSE","code":""},{"path":"https://brolgar.njtierney.com/reference/monotonic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Are values monotonic? Always increasing, decreasing, or unvarying? — monotonic","text":"","code":"vec_inc <- c(1:10) vec_dec<- c(10:1) vec_ran <- c(sample(1:10)) vec_flat <- rep.int(1,10)  increasing(vec_inc) #> [1] TRUE increasing(vec_dec) #> [1] FALSE increasing(vec_ran) #> [1] FALSE increasing(vec_flat) #> [1] FALSE  decreasing(vec_inc) #> [1] FALSE decreasing(vec_dec) #> [1] TRUE decreasing(vec_ran) #> [1] FALSE decreasing(vec_flat) #> [1] FALSE  unvarying(vec_inc) #> [1] FALSE unvarying(vec_dec) #> [1] FALSE unvarying(vec_ran) #> [1] FALSE unvarying(vec_flat) #> [1] TRUE  library(ggplot2) library(gghighlight) library(dplyr)  heights_mono <- heights %>%   features(height_cm, feat_monotonic) %>%   left_join(heights, by = \"country\")      ggplot(heights_mono,          aes(x = year,              y = height_cm,              group = country)) +   geom_line() +    gghighlight(increase) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: country #> Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps    ggplot(heights_mono,         aes(x = year,             y = height_cm,              group = country)) +   geom_line() +    gghighlight(decrease) #> Warning: Tried to calculate with group_by(), but the calculation failed. #> Falling back to ungrouped filter operation... #> label_key: country   heights_mono %>% filter(monotonic) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()     heights_mono %>%   filter(increase) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":null,"dir":"Reference","previous_headings":"","what":"Return the number of observations — n_obs","title":"Return the number of observations — n_obs","text":"Returns number observations vector data.frame. uses vctrs::vec_size() hood.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return the number of observations — n_obs","text":"","code":"n_obs(x, names = TRUE)"},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return the number of observations — n_obs","text":"x vector data.frame names logical; TRUE result named vector named \"n_obs\", else just number observations.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return the number of observations — n_obs","text":"number observations","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"note","dir":"Reference","previous_headings":"","what":"Note","title":"Return the number of observations — n_obs","text":"use n_obs features counting key variable like - features(heights, country, n_obs). Instead, use variable.","code":""},{"path":"https://brolgar.njtierney.com/reference/n_obs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return the number of observations — n_obs","text":"","code":"n_obs(iris) #> n_obs  #>   150  n_obs(1:10) #> n_obs  #>    10  add_n_obs(heights) #> # A tsibble: 1,490 x 5 [!] #> # Key:       country [144] #>    country      year n_obs continent height_cm #>    <chr>       <dbl> <int> <chr>         <dbl> #>  1 Afghanistan  1870     5 Asia           168. #>  2 Afghanistan  1880     5 Asia           166. #>  3 Afghanistan  1930     5 Asia           167. #>  4 Afghanistan  1990     5 Asia           167. #>  5 Afghanistan  2000     5 Asia           161. #>  6 Albania      1880     4 Europe         170. #>  7 Albania      1890     4 Europe         170. #>  8 Albania      1900     4 Europe         169. #>  9 Albania      2000     4 Europe         168. #> 10 Algeria      1910     5 Africa         169. #> # ℹ 1,480 more rows heights %>%   features(height_cm, n_obs) # can be any variable except id, the key. #> # A tibble: 144 × 2 #>    country     n_obs #>    <chr>       <int> #>  1 Afghanistan     5 #>  2 Albania         4 #>  3 Algeria         5 #>  4 Angola          9 #>  5 Argentina      20 #>  6 Armenia        11 #>  7 Australia      10 #>  8 Austria        18 #>  9 Azerbaijan      7 #> 10 Bahrain         3 #> # ℹ 134 more rows"},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":null,"dir":"Reference","previous_headings":"","what":"Return x percent to y percent of values — near_between","title":"Return x percent to y percent of values — near_between","text":"Return x percent y percent values","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return x percent to y percent of values — near_between","text":"","code":"near_between(x, from, to)"},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return x percent to y percent of values — near_between","text":"x numeric vector lower bound percentage upper bound percentage","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return x percent to y percent of values — near_between","text":"logical vector","code":""},{"path":"https://brolgar.njtierney.com/reference/near_between.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return x percent to y percent of values — near_between","text":"","code":"x <- runif(20)  near_middle(x = x,             middle = 0.5,             within = 0.2) #>  [1]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  library(dplyr) heights %>% features(height_cm, list(min = min)) %>%   filter(near_between(min, 0.1, 0.9)) #> # A tibble: 114 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Afghanistan  161. #>  2 Albania      168. #>  3 Algeria      166. #>  4 Argentina    167. #>  5 Armenia      164. #>  6 Austria      162. #>  7 Bahrain      161. #>  8 Bangladesh   160. #>  9 Belarus      164. #> 10 Belgium      163. #> # ℹ 104 more rows  near_quantile(x = x,               probs = 0.5,                tol = 0.01) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  near_quantile(x, c(0.25, 0.5, 0.75), 0.05) #>  [1]  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  heights %>%   features(height_cm, l_five_num) %>%   mutate_at(vars(min:max),             .funs = near_quantile,             0.5,              0.01) %>%   filter(min) #> # A tibble: 0 × 6 #> # ℹ 6 variables: country <chr>, min <lgl>, q_25 <lgl>, med <lgl>, q_75 <lgl>, #> #   max <lgl>  heights %>%   features(height_cm, list(min = min)) %>%   mutate(min_near_q3 = near_quantile(min, c(0.25, 0.5, 0.75), 0.01)) %>%   filter(min_near_q3) #> # A tibble: 2 × 3 #>   country      min min_near_q3 #>   <chr>      <dbl> <lgl>       #> 1 Ethiopia    161. TRUE        #> 2 Madagascar  161. TRUE         heights %>%   features(height_cm, list(min = min)) %>%   filter(near_between(min, 0.1, 0.9)) #> # A tibble: 114 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Afghanistan  161. #>  2 Albania      168. #>  3 Algeria      166. #>  4 Argentina    167. #>  5 Armenia      164. #>  6 Austria      162. #>  7 Bahrain      161. #>  8 Bangladesh   160. #>  9 Belarus      164. #> 10 Belgium      163. #> # ℹ 104 more rows  heights %>%   features(height_cm, list(min = min)) %>%   filter(near_middle(min, 0.5, 0.1)) #> # A tibble: 14 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Brazil       164. #>  2 Cameroon     164. #>  3 Estonia      165. #>  4 Gabon        164. #>  5 Ghana        164. #>  6 Guinea       164. #>  7 Kenya        165. #>  8 Kyrgyzstan   164. #>  9 Latvia       165. #> 10 Lithuania    165. #> 11 Netherlands  164. #> 12 Switzerland  165. #> 13 Tajikistan   165. #> 14 Uganda       165."},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":null,"dir":"Reference","previous_headings":"","what":"Return the middle x percent of values — near_middle","title":"Return the middle x percent of values — near_middle","text":"Return middle x percent values","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return the middle x percent of values — near_middle","text":"","code":"near_middle(x, middle, within)"},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Return the middle x percent of values — near_middle","text":"x numeric vector middle percentage want center around within percentage around center","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Return the middle x percent of values — near_middle","text":"logical vector","code":""},{"path":"https://brolgar.njtierney.com/reference/near_middle.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Return the middle x percent of values — near_middle","text":"","code":"x <- runif(20) near_middle(x = x,             middle = 0.5,             within = 0.2) #>  [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE #> [13] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE              library(dplyr) heights %>% features(height_cm, list(min = min)) %>%   filter(near_middle(min, 0.5, 0.1)) #> # A tibble: 14 × 2 #>    country       min #>    <chr>       <dbl> #>  1 Brazil       164. #>  2 Cameroon     164. #>  3 Estonia      165. #>  4 Gabon        164. #>  5 Ghana        164. #>  6 Guinea       164. #>  7 Kenya        165. #>  8 Kyrgyzstan   164. #>  9 Latvia       165. #> 10 Lithuania    165. #> 11 Netherlands  164. #> 12 Switzerland  165. #> 13 Tajikistan   165. #> 14 Uganda       165."},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":null,"dir":"Reference","previous_headings":"","what":"Which values are nearest to any given quantiles — near_quantile","title":"Which values are nearest to any given quantiles — near_quantile","text":"values nearest given quantiles","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Which values are nearest to any given quantiles — near_quantile","text":"","code":"near_quantile(x, probs, tol = 0.01)"},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Which values are nearest to any given quantiles — near_quantile","text":"x vector probs quantiles calculate tol tolerance terms x accept near quantile. Default 0.01.","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Which values are nearest to any given quantiles — near_quantile","text":"logical vector TRUE/FALSE number close quantile","code":""},{"path":"https://brolgar.njtierney.com/reference/near_quantile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Which values are nearest to any given quantiles — near_quantile","text":"","code":"x <- runif(20) near_quantile(x, 0.5, 0.05) #>  [1] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE #> [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE near_quantile(x, c(0.25, 0.5, 0.75), 0.05) #>  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE #> [13] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  library(dplyr) heights %>%    features(height_cm, list(min = min)) %>%    mutate(min_near_median = near_quantile(min, 0.5, 0.01)) %>%   filter(min_near_median) #> # A tibble: 0 × 3 #> # ℹ 3 variables: country <chr>, min <dbl>, min_near_median <lgl> heights %>%    features(height_cm, list(min = min)) %>%    mutate(min_near_q3 = near_quantile(min, c(0.25, 0.5, 0.75), 0.01)) %>%   filter(min_near_q3) #> # A tibble: 2 × 3 #>   country      min min_near_q3 #>   <chr>      <dbl> <lgl>       #> 1 Ethiopia    161. TRUE        #> 2 Madagascar  161. TRUE"},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":null,"dir":"Reference","previous_headings":"","what":"Is x nearest to y? — nearests","title":"Is x nearest to y? — nearests","text":"Returns TRUE x nearest y. two implementations. nearest_lgl() returns logical vector element first argument nearest element second argument. nearest_qt_lgl() similar nearest_lgl(), instead determines element first argument nearest value given quantile probabilities. See example detail.","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Is x nearest to y? — nearests","text":"","code":"nearest_lgl(x, y)  nearest_qt_lgl(y, ...)"},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Is x nearest to y? — nearests","text":"x numeric vector y numeric vector ... (used) arguments pass quantile().","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Is x nearest to y? — nearests","text":"logical vector length(y)","code":""},{"path":"https://brolgar.njtierney.com/reference/nearests.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Is x nearest to y? — nearests","text":"","code":"x <- 1:10 y <- 5:14 z <- 16:25 a <- -1:-5 b <- -1  nearest_lgl(x, y) #>  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE nearest_lgl(y, x) #>  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  nearest_lgl(x, z) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE nearest_lgl(z, x) #>  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  nearest_lgl(x, a) #> [1]  TRUE FALSE FALSE FALSE FALSE nearest_lgl(a, x) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  nearest_lgl(x, b) #> [1] TRUE nearest_lgl(b, x) #>  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  library(dplyr) heights_near_min <- heights %>%   filter(nearest_lgl(min(height_cm), height_cm))    heights_near_fivenum <- heights %>%   filter(nearest_lgl(fivenum(height_cm), height_cm))    heights_near_qt_1 <- heights %>%   filter(nearest_qt_lgl(height_cm, c(0.5)))    heights_near_qt_3 <- heights %>%   filter(nearest_qt_lgl(height_cm, c(0.1, 0.5, 0.9)))"},{"path":"https://brolgar.njtierney.com/reference/pipe.html","id":null,"dir":"Reference","previous_headings":"","what":"Pipe operator — %>%","title":"Pipe operator — %>%","text":"See magrittr::%>% details.","code":""},{"path":"https://brolgar.njtierney.com/reference/pipe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pipe operator — %>%","text":"","code":"lhs %>% rhs"},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":null,"dir":"Reference","previous_headings":"","what":"Student data from 2000-2018 PISA OECD data — pisa","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"subset PISA data, containing scores information triennial testing 15 year olds around globe. Original data available https://www.oecd.org/pisa/data/. Data derived https://github.com/kevinwang09/learningtower.","code":""},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"","code":"pisa"},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"tibble following variables year year measurement country three letter country code. data contains Australia, New Zealand, Indonesia. full data learningtower contains 99 countries. school_id unique school identification number student_id student identification number gender recorded gender - 1 female 2 male missing math Simulated score mathematics read Simulated score reading science Simulated score science stu_wgt final survey weight score student score Understanding bit PISA data, school_id student_id unique across time. means longitudinal element country within given year. can cast pisa tsibble, need aggregate data year country. , important provide summary statistics scores - want include mean, minimum maximum math, reading, science scores, lose information individuals. example code , first grouping year country, calculating weighted mean math, reading, science. can done using student weight variable stu_wgt, get survey weighted mean. minimum maximum calculated.","code":""},{"path":"https://brolgar.njtierney.com/reference/pisa.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Student data from 2000-2018 PISA OECD data — pisa","text":"","code":"pisa #> # A tibble: 433 × 11 #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>  library(dplyr) # Let's identify  #1.  The **key**, the individual, who would have repeated measurements.  #2.  The **index**, the time component. #3.  The **regularity** of the time interval (index).   # Here it looks like the key is the student_id, which is nested within # school_id #' and country,  # And the index is year, so we would write the following  as_tsibble(pisa,             key = country,            index = year) #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl>  # We can assess the regularity of the year like so:  index_regular(pisa, year) #> [1] TRUE index_summary(pisa, year) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    2000    2004    2009    2009    2014    2018   # We can now convert this into a `tsibble`:  pisa_ts <- as_tsibble(pisa,            key = country,            index = year,            regular = TRUE)  pisa_ts #> # A tsibble: 433 x 11 [3Y] #> # Key:       country [100] #>    country  year math_mean math_min math_max read_mean read_min read_max #>    <fct>   <int>     <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl> #>  1 ALB      2000      395.     27.4     722.      354.  59.7        640. #>  2 ALB      2009      377.     79.6     706.      385.  17.0        662. #>  3 ALB      2012      395.     62.4     688.      394.   0.0834     742. #>  4 ALB      2015      412.    122.      711.      405.  93.6        825. #>  5 ALB      2018      437.     96.5     789.      405. 152.         693. #>  6 ARE      2009      421.     57.8     768.      431.  48.1        772. #>  7 ARE      2012      434.    138.      862.      442.  75.5        785. #>  8 ARE      2015      427.     91.8     793.      432.  54.4        827. #>  9 ARE      2018      437.     87.6     865.      431.  84.0        814. #> 10 ARG      2000      385.     16.0     675.      417.  84.2        761. #> # ℹ 423 more rows #> # ℹ 3 more variables: science_mean <dbl>, science_min <dbl>, science_max <dbl> pisa_ts_au_nz <- pisa_ts %>% filter(country %in% c(\"AUS\", \"NZL\", \"QAT\"))  library(ggplot2) ggplot(pisa_ts_au_nz,         aes(x = year,             y = math_mean,            group = country,            colour = country)) +   geom_ribbon(aes(ymin = math_min,                    ymax = math_max),                fill = \"grey70\") +   geom_line(size = 1) +   lims(y = c(0, 1000)) +   labs(y = \"math\") + facet_wrap(~country) #> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. #> ℹ Please use `linewidth` instead."},{"path":"https://brolgar.njtierney.com/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. fabletools features, features_all, features_at, features_if tsibble as_tsibble, n_keys","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample a number or fraction of keys to explore — sample-n-frac-keys","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"Sample number fraction keys explore","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"","code":"sample_n_keys(.data, size)  sample_frac_keys(.data, size)"},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":".data tsibble object size number fraction observations, depending function used. sample_n_keys, number > 0, sample_frac_keys fraction, 0 1.","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"tsibble fewer observations key","code":""},{"path":"https://brolgar.njtierney.com/reference/sample-n-frac-keys.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample a number or fraction of keys to explore — sample-n-frac-keys","text":"","code":"library(ggplot2) sample_n_keys(heights,              size = 10) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line()  library(ggplot2) sample_frac_keys(wages,                 0.1) %>%   ggplot(aes(x = xp,              y = unemploy_rate,              group = id)) +    geom_line()"},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":null,"dir":"Reference","previous_headings":"","what":"Stratify the keys into groups to facilitate exploration — stratify_keys","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"look much raw data possible, can helpful stratify data groups plotting. can stratify keys using stratify_keys() function, adds column, .strata. allows user create facetted plots showing raw data.","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"","code":"stratify_keys(.data, n_strata, along = NULL, fun = mean, ...)"},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":".data data.frame explore n_strata number groups create along variable stratify along. groups key takes summary statistic (default, mean). arranges mean value key assigns n_strata groups. fun summary function. Default mean. ... extra arguments","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"data.frame column, .strata containing n_strata groups","code":""},{"path":"https://brolgar.njtierney.com/reference/stratify_keys.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Stratify the keys into groups to facilitate exploration — stratify_keys","text":"","code":"library(ggplot2) library(brolgar)  heights %>%   sample_frac_keys(size = 0.1) %>%   stratify_keys(10) %>%  ggplot(aes(x = height_cm,             y = year,             group = country)) +   geom_line() +   facet_wrap(~.strata)     # now facet along some feature library(dplyr)  heights %>% key_slope(height_cm ~ year) %>%   right_join(heights, ., by = \"country\") %>%   stratify_keys(n_strata = 12,                 along = .slope_year,                 fun = median) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line() +    facet_wrap(~.strata)    heights %>%   stratify_keys(n_strata = 12,                 along = height_cm) %>%   ggplot(aes(x = year,              y = height_cm,              group = country)) +    geom_line() +    facet_wrap(~.strata)"},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":null,"dir":"Reference","previous_headings":"","what":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"data contains measurements hourly wages years workforce, education race covariates. population measured male high-school dropouts, aged 14 17 years first measured. wages time series tsibble. comes J. D. Singer J. B. Willett. Applied Longitudinal Data Analysis. Oxford University Press, Oxford, UK, 2003. https://stats.idre.ucla.edu/stat/r/examples/alda/data/wages_pp.txt","code":""},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"","code":"wages"},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"tsibble data frame 6402 rows 8 variables: id 1–888, subject. forms key data ln_wages natural log wages, adjusted inflation, 1990 dollars. xp Experience - length time workforce (years). treated time variable, t0 subject starting first day work. number time points values time points subject can differ. forms index data ged /graduate equivalency diploma obtained. xp_since_ged change experience since getting ged (get one) black categorical indicator race = black. hispanic categorical indicator race = hispanic. high_grade highest grade completed unemploy_rate unemployment rates local geographic region measurement time","code":""},{"path":"https://brolgar.njtierney.com/reference/wages.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wages data from National Longitudinal Survey of Youth (NLSY) — wages","text":"","code":"# show the data wages #> # A tsibble: 6,402 x 9 [!] #> # Key:       id [888] #>       id ln_wages    xp   ged xp_since_ged black hispanic high_grade #>    <int>    <dbl> <dbl> <int>        <dbl> <int>    <int>      <int> #>  1    31     1.49 0.015     1        0.015     0        1          8 #>  2    31     1.43 0.715     1        0.715     0        1          8 #>  3    31     1.47 1.73      1        1.73      0        1          8 #>  4    31     1.75 2.77      1        2.77      0        1          8 #>  5    31     1.93 3.93      1        3.93      0        1          8 #>  6    31     1.71 4.95      1        4.95      0        1          8 #>  7    31     2.09 5.96      1        5.96      0        1          8 #>  8    31     2.13 6.98      1        6.98      0        1          8 #>  9    36     1.98 0.315     1        0.315     0        0          9 #> 10    36     1.80 0.983     1        0.983     0        0          9 #> # ℹ 6,392 more rows #> # ℹ 1 more variable: unemploy_rate <dbl> library(ggplot2) # set seed so that the plots stay the same set.seed(2019-7-15-1300) # explore a sample of five individuals wages %>%   sample_n_keys(size = 5) %>%   ggplot(aes(x = xp,              y = ln_wages,              group = id)) +    geom_line()   # Explore many samples with `facet_sample()`   ggplot(wages,           aes(x = xp,              y = ln_wages,              group = id)) +    geom_line() +    facet_sample()   # explore the five number summary of ln_wages with `features` wages %>%    features(ln_wages, feat_five_num) #> # A tibble: 888 × 6 #>       id   min   q25   med   q75   max #>    <int> <dbl> <dbl> <dbl> <dbl> <dbl> #>  1    31 1.43   1.48  1.73  2.02  2.13 #>  2    36 1.80   1.97  2.32  2.59  2.93 #>  3    53 1.54   1.58  1.71  1.89  3.24 #>  4   122 0.763  2.10  2.19  2.46  2.92 #>  5   134 2.00   2.28  2.36  2.79  2.93 #>  6   145 1.48   1.58  1.77  1.89  2.04 #>  7   155 1.54   1.83  2.22  2.44  2.64 #>  8   173 1.56   1.68  2.00  2.05  2.34 #>  9   206 2.03   2.07  2.30  2.45  2.48 #> 10   207 1.58   1.87  2.15  2.26  2.66 #> # ℹ 878 more rows"}]