From 69f7dc5d41d14b7dca3ce8da021e11d9eeb8b4b7 Mon Sep 17 00:00:00 2001 From: XiangyunHuang Date: Tue, 21 Sep 2021 09:38:28 +0800 Subject: [PATCH] update --- .github/workflows/Render-Book.yaml | 2 +- DESCRIPTION | 4 +- _bookdown.yml | 83 +++-- _common.R | 26 +- advanced-documents.Rmd | 14 + book.bib | 28 +- dashboard/007-shiny-app/README.md | 24 +- data-modeling.Rmd | 5 + data-product.Rmd | 5 + data-visualization.Rmd | 53 ++- data-wrangling.Rmd | 5 + document-elements.Rmd | 96 +++++ dynamic-documents.Rmd | 398 +------------------- generalized-additive-models.Rmd | 13 +- generalized-linear-models.Rmd | 8 +- graphics-foundations.Rmd | 132 +++---- images/cover.png | Bin 0 -> 31621 bytes images/cover.svg | 1 + index.Rmd | 101 ++--- interactive-web-graphics.Rmd | 568 +++++++++-------------------- linear-models.Rmd | 29 +- machine-learning.Rmd | 4 +- natural-language-processing.Rmd | 2 + neural-networks.Rmd | 84 +++++ numerical-optimization.Rmd | 35 ++ office-documents.Rmd | 19 + portable-documents.Rmd | 167 +++++++++ preamble.tex | 12 +- preface.Rmd | 6 +- regular-expressions.Rmd | 5 + renv.lock | 301 ++++++++------- reproducible-workflows.Rmd | 5 + spatial-analysis.Rmd | 216 ++++++----- spatial-modeling.Rmd | 15 +- spatial-viz.Rmd | 321 ++++++++++++++++ spatio-temporal-data.Rmd | 6 + statistical-foundations.Rmd | 5 + statistical-graphics.Rmd | 5 + statistical-models.Rmd | 6 + string-operations.Rmd | 15 + style.css | 4 + symbolic-computation.Rmd | 2 +- text-analysis.Rmd | 3 + web-documents.Rmd | 93 +++++ 44 files changed, 1631 insertions(+), 1295 deletions(-) create mode 100644 advanced-documents.Rmd create mode 100644 data-modeling.Rmd create mode 100644 data-product.Rmd create mode 100644 data-wrangling.Rmd create mode 100644 document-elements.Rmd create mode 100644 images/cover.png create mode 100644 images/cover.svg create mode 100644 office-documents.Rmd create mode 100644 portable-documents.Rmd create mode 100644 reproducible-workflows.Rmd create mode 100644 spatial-viz.Rmd create mode 100644 spatio-temporal-data.Rmd create mode 100644 statistical-foundations.Rmd create mode 100644 statistical-graphics.Rmd create mode 100644 statistical-models.Rmd create mode 100644 web-documents.Rmd diff --git a/.github/workflows/Render-Book.yaml b/.github/workflows/Render-Book.yaml index 649670441..f9ecd000e 100644 --- a/.github/workflows/Render-Book.yaml +++ b/.github/workflows/Render-Book.yaml @@ -30,7 +30,7 @@ jobs: strategy: matrix: config: - - {os: ubuntu-20.04, r: '4.1.0'} + - {os: ubuntu-20.04, r: '4.1.1'} # - {os: macos-11, r: '4.1.0'} env: diff --git a/DESCRIPTION b/DESCRIPTION index 7d7d9cdd2..e10e2114e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,6 +10,7 @@ Imports: arules (>= 1.6-8), autoplotly (>= 0.1.4), bayesplot (>= 1.8.1), + bbmle (>= 1.0.24), beanplot (>= 1.2), BiocManager (>= 1.30.16), bookdown (>= 0.22), @@ -53,11 +54,11 @@ Imports: heatmaply (>= 1.2.1), highcharter (>= 0.8.2), hrbrthemes (>= 0.8.0), - INLA (>= 21.02.23), knitr (>= 1.33), kableExtra (>= 1.3.4), Kendall (>= 2.2), leaflet (>= 2.0.4.1), + leafletCN (>= 0.2.1), lightgbm (>= 3.2.0), lme4 (>= 1.1-27.1), magick (>= 2.7.2), @@ -104,6 +105,7 @@ Imports: sp (>= 1.4-2), sparkline (>= 2.0), spDataLarge (>= 0.5.4), + splines2 (>= 0.4.3), sparklyr (>= 1.7.1), StanHeaders (>= 2.26.2), SuppDists (>= 1.1-9.5), diff --git a/_bookdown.yml b/_bookdown.yml index 37ce3fbf5..155d36c5c 100644 --- a/_bookdown.yml +++ b/_bookdown.yml @@ -15,36 +15,47 @@ before_chapter_script: ["_common.R"] rmd_files: - "index.Rmd" -- "preface.Rmd" # 前言 -- "notations.Rmd" # 符号表 +- "preface.Rmd" # 前言 -# R 语言 - -- "file-operations.Rmd" # 文件操作 -- "data-structure.Rmd" # R 语言数据结构 -- "data-manipulation.Rmd" # 数据操作 +# 数据处理 +- "data-wrangling.Rmd" +- "data-structure.Rmd" # R 语言数据结构 - "data-transportation.Rmd" # 数据导入导出 -- "graphics-foundations.Rmd" # 图形基础 -- "data-visualization.Rmd" # 数据可视化 -- "dynamic-documents.Rmd" # 动态文档 - +- "string-operations.Rmd" # 字符串操作 +- "regular-expressions.Rmd" # 正则表达式 +- "data-manipulation.Rmd" # 数据操作 + +# 统计图形 +- "statistical-graphics.Rmd" +- "graphics-foundations.Rmd" # 图形基础 +- "data-visualization.Rmd" # 数据可视化 - "interactive-web-graphics.Rmd" # 交互式网页图形 + +# 文档 +- "dynamic-documents.Rmd" # 动态文档 +- "document-elements.Rmd" # 文档元素 +- "portable-documents.Rmd" # 便携式文档 +- "web-documents.Rmd" # 网页文档 +- "office-documents.Rmd" # 办公文档 +- "reproducible-workflows.Rmd" # 可重复性工作流 +- "advanced-documents.Rmd" # 大型应用 + +# 数据产品 +- "data-product.Rmd" - "interactive-data-tables.Rmd" # 交互式表格 - "interactive-shiny-app.Rmd" # 交互式仪表盘 -- "string-operations.Rmd" # 字符串操作 -- "regular-expressions.Rmd" # 正则表达式 -- "text-analysis.Rmd" # 文本分析 - -# 现代统计 - +# 统计基础 +- "statistical-foundations.Rmd" - "sampling-distributions.Rmd" # 抽样分布 - "parameter-estimators.Rmd" # 参数估计 - "hypothesis-test.Rmd" # 假设检验 - "power-analysis.Rmd" # 功效分析 - "experimental-design.Rmd" # 试验设计 +# 统计模型 +- "statistical-models.Rmd" - "linear-models.Rmd" # 线性模型 - "generalized-linear-models.Rmd" # 广义线性模型 #- "generalized-additive-models.Rmd" # 广义可加模型 @@ -54,18 +65,27 @@ rmd_files: #- "generalized-linear-mixed-effects-models.Rmd" # 广义线性混合效应模型 #- "generalized-additive-mixed-models.Rmd" # 广义可加混合效应模型 #- "nonlinear-mixed-models.Rmd" # 非线性混合效应模型 +- "bayesian-models.Rmd" # 贝叶斯模型 -- "case-study.Rmd" # 案例学习 -- "data-explorer.Rmd" # 数据探索 +# 数据建模 +- "data-modeling.Rmd" +- "text-analysis.Rmd" # 文本分析 - "survival-analysis.Rmd" # 生存分析 - "time-series-analysis.Rmd" # 时序分析 + +# 时空统计 +- "spatio-temporal-data.Rmd" - "spatial-analysis.Rmd" # 空间分析 - "spatial-modeling.Rmd" # 空间建模 +- "spatial-viz.Rmd" + +# 统计应用 +- "case-study.Rmd" # 案例学习 +- "data-explorer.Rmd" # 数据探索 -- "bayesian-models.Rmd" # 贝叶斯模型 # 机器学习 -#- "machine-learning.Rmd" +- "machine-learning.Rmd" #- "k-means-clustering.Rmd" #- "k-nearest-neighbors.Rmd" @@ -76,24 +96,27 @@ rmd_files: #- "decision-trees.Rmd" #- "random-forests.Rmd" - "gradient-boosting-machine.Rmd" -- "neural-networks.Rmd" +# - "neural-networks.Rmd" -- "matrix-operations.Rmd" # 矩阵运算 -- "symbolic-computation.Rmd" -- "numerical-optimization.Rmd" # 数值优化 +# 运筹优化 + +- "numerical-optimization.Rmd" # 数值优化 #- "expectation-maximization.Rmd" #- "markov-chain-monte-carlo.Rmd" #- "deep-learning.Rmd" -#- "natural-language-processing.Rmd" # 文本挖掘 分词 主题模型 LDA +#- "natural-language-processing.Rmd" # 文本挖掘 分词 主题模型 LDA -- "appendix.Rmd" # 附录和命令行操作 -- "other-softwares.Rmd" # 其它软件工具 +- "appendix.Rmd" # 附录和命令行操作 +- "matrix-operations.Rmd" # 矩阵运算 +- "symbolic-computation.Rmd" -- "mixed-programming.Rmd" # R 和其它语言的混合编程 +- "mixed-programming.Rmd" # 混合编程 - "object-oriented-programming.Rmd" # 面向对象编程 - +- "file-operations.Rmd" # 文件操作 +- "other-softwares.Rmd" # 其它软件工具 #- "random-vector.Rmd" #- "stochastic-process.Rmd" +- "notations.Rmd" # 符号表 - "references.Rmd" diff --git a/_common.R b/_common.R index 631adaf20..a78b6cf7f 100644 --- a/_common.R +++ b/_common.R @@ -1,16 +1,18 @@ -knitr::opts_chunk$set(width = 69, dpi = 300, message = FALSE, fig.align='center') +knitr::opts_chunk$set(width = 69, dpi = 300, message = FALSE, fig.align = "center") # convert pdf to png to_png <- function(fig_path) { png_path <- sub("\\.pdf$", ".png", fig_path) - magick::image_write(magick::image_read_pdf(fig_path), format = "png", path = png_path, - density = 300, quality = 100) + magick::image_write(magick::image_read_pdf(fig_path), + format = "png", path = png_path, + density = 300, quality = 100 + ) return(png_path) } # embed math fonts to pdf embed_math_fonts <- function(fig_path) { - if(knitr::is_latex_output()){ + if (knitr::is_latex_output()) { embedFonts( file = fig_path, outfile = fig_path, fontpaths = system.file("fonts", package = "fontcm") @@ -21,15 +23,15 @@ embed_math_fonts <- function(fig_path) { knitr::knit_hooks$set(output = local({ # the default output hook - hook_output = knitr::knit_hooks$get('output') + hook_output <- knitr::knit_hooks$get("output") function(x, options) { if (!is.null(n <- options$out.lines)) { # out.lines - x = xfun::split_lines(x) + x <- xfun::split_lines(x) if (length(x) > n) { # truncate the output - x = c(head(x, n), '....\n') + x <- c(head(x, n), "....\n") } - x = paste(x, collapse = '\n') # paste first n lines together + x <- paste(x, collapse = "\n") # paste first n lines together } hook_output(x, options) } @@ -41,15 +43,15 @@ palette(c( "#4285f4", # GoogleBlue "#34A853", # GoogleGreen "#FBBC05", # GoogleYellow - "#EA4335" # GoogleRed + "#EA4335" # GoogleRed )) is_on_travis <- identical(Sys.getenv("TRAVIS"), "true") is_online <- curl::has_internet() is_latex <- identical(knitr::opts_knit$get("rmarkdown.pandoc.to"), "latex") is_html <- identical(knitr::opts_knit$get("rmarkdown.pandoc.to"), "html") -is_windows <- identical(.Platform$OS.type, 'windows') -is_unix <- identical(.Platform$OS.type, 'unix') +is_windows <- identical(.Platform$OS.type, "windows") +is_unix <- identical(.Platform$OS.type, "unix") # 创建临时的目录存放数据集 -if(!dir.exists(paths = "./data")) dir.create(path = "./data") +if (!dir.exists(paths = "./data")) dir.create(path = "./data") diff --git a/advanced-documents.Rmd b/advanced-documents.Rmd new file mode 100644 index 000000000..79b3f5309 --- /dev/null +++ b/advanced-documents.Rmd @@ -0,0 +1,14 @@ +# 高级文档 {#chap-advanced-documents} + +## 编写书籍 {#sec-bookdown} + +此外, [ElegantTufteBookdown](https://github.com/XiangyunHuang/ElegantTufteBookdown) 项目提供了 tufte 风格的书籍模板,本书配套的仓库目录 `examples/` 下准备了一系列常用模板。 + +## 个人网站 {#sec-blogdown} + +## R 包文档 {#sec-pkgdown} + +## 课程网站 {#sec-rmarkdown} + + + diff --git a/book.bib b/book.bib index afd27f853..18af9449a 100644 --- a/book.bib +++ b/book.bib @@ -137,7 +137,22 @@ @Inbook{Deb2005 doi = "10.1007/0-387-28356-0_10", url = "https://doi.org/10.1007/0-387-28356-0_10" } - +@Article{Ryacas, + title = {{Ryacas: A computer algebra system in R}}, + author = {Mikkel Meyer Andersen and Søren Højsgaard}, + journal = {Journal of Open Source Software}, + year = {2019}, + volume = {4}, + number = {42}, + url = {https://doi.org/10.21105/joss.01763}, +} +@Article{mgcViz, + author = {Matteo Fasiolo and Raphael Nedellec and Yannig Goude and Simon N. Wood}, + title = {Scalable visualisation methods for modern Generalized Additive Models.}, + journal = {Arxiv preprint}, + year = {2018}, + url = {https://arxiv.org/abs/1809.10632}, +} @Article{heatmaply2017, author = {{Galili} and {Tal} and {O'Callaghan} and {Alan} and {Sidi} and {Jonathan} and {Sievert} and {Carson}}, title = {{heatmaply}: an R package for creating interactive cluster heatmaps for online publishing}, @@ -1839,3 +1854,14 @@ @article{RS_2020_John volume = {12}, number = {1} } + +@Article{Pebesma_2005_sp, + author = {Edzer J. Pebesma and Roger S. Bivand}, + title = {Classes and methods for spatial data in {R}}, + journal = {R News}, + year = {2005}, + volume = {5}, + number = {2}, + pages = {9--13}, + url = {https://cran.r-project.org/doc/Rnews/Rnews_2005-2.pdf}, +} diff --git a/dashboard/007-shiny-app/README.md b/dashboard/007-shiny-app/README.md index 9b68ce70a..0286ba9e9 100644 --- a/dashboard/007-shiny-app/README.md +++ b/dashboard/007-shiny-app/README.md @@ -1,23 +1,5 @@ -Old Faithful Geyser Data -美国怀俄明州黄石国家公园-老忠实间歇泉 -每隔一段时间就喷发,非常守时规律,表现得很老实,故而得名 +### Old Faithful Geyser Data -维基百科的介绍 -https://en.wikipedia.org/wiki/Old_Faithful -用这个数据集讲一个故事 - -数据集介绍 -https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/faithful.html - -更大范围的数据集 -http://www.stat.yale.edu/~jah49/Pictures_in_R/Fickle_Old_Faithful/OldFaithful.pdf - -如果只想获得数据集可以从这下载 -https://www.stat.cmu.edu/~larry/all-of-statistics/=data/faithful.dat - -MASS 的 geyser 数据集(299 条记录)比 Base R 自带的 faithful 数据集(272 条记录)多 27 条数据 - -把窗宽的概念、二维核密度估计的概念 - -一维的密度、直方图 +美国怀俄明州黄石国家公园-老忠实间歇泉,每隔一段时间就喷发,非常守时规律,表现得很老实,故而得名。 +https://en.wikipedia.org/wiki/Old_Faithful diff --git a/data-modeling.Rmd b/data-modeling.Rmd new file mode 100644 index 000000000..2e9b0a8d6 --- /dev/null +++ b/data-modeling.Rmd @@ -0,0 +1,5 @@ +# (PART) 数据建模 {-} + +# 介绍 {#chap-data-modeling .unnumbered} + +数据建模 diff --git a/data-product.Rmd b/data-product.Rmd new file mode 100644 index 000000000..55d063304 --- /dev/null +++ b/data-product.Rmd @@ -0,0 +1,5 @@ +# (PART) 数据产品 {-} + +# 介绍 {#chap-data-product .unnumbered} + +数据产品 diff --git a/data-visualization.Rmd b/data-visualization.Rmd index 5cd0852ed..92ad19cb8 100644 --- a/data-visualization.Rmd +++ b/data-visualization.Rmd @@ -48,7 +48,6 @@ ggplot(mpg, aes(displ, hwy)) + 故事源于一幅图片,我不记得第一次见到这幅图是什么时候了,只因多次在多个场合中见过,所以留下了深刻的印象,后来才知道它出自于一篇博文 --- [Using R packages and education to scale Data Science at Airbnb](https://medium.com/airbnb-engineering/using-r-packages-and-education-to-scale-data-science-at-airbnb),作者 Ricardo Bion 还在其 Github 上传了相关代码^[]。除此之外还有几篇重要的参考资料: 1. Pablo Barberá 的 [Data Visualization with R and ggplot2](https://github.com/pablobarbera/Rdataviz) -2. Kieran Healy 的新书 [Data Visualization: A Practical Introduction](https://kieranhealy.org/publications/dataviz/) 3. Matt Leonawicz 的新作 [mapmate](https://github.com/leonawicz/mapmate), 可以去其主页欣赏系列作品^[] 4. [tidytuesday 可视化挑战官方项目](https://github.com/rfordatascience/tidytuesday) 还有 [tidytuesday](https://github.com/abichat/tidytuesday) 5. [ggstatsplot](https://github.com/IndrajeetPatil/ggstatsplot) 可视化统计检验、模型的结果 @@ -362,7 +361,7 @@ sysfonts::font_add_google(name = "Noto Sans SC", family = "Noto Sans SC") ``` ::: {.rmdwarn data-latex="{警告}"} -在本书中,不要全局加载 showtext 包或调用 `showtext::showtext_auto()`,会和 extrafont 冲突,使得绘图时默认就只能使用 showtext 提供的字体。 +在本书中,不要全局加载 showtext 包或调用 `showtext::showtext_auto()`,会和 extrafont 冲突,使得绘图时默认就只能使用 showtext 提供的字体。extrafont 包提供的函数 `font_import()` 仅支持系统安装的 TrueType/Type1 字体 ::: ```{r font-in-ggplot, fig.width=6, fig.height=6, fig.showtext=TRUE, fig.cap="在 ggplot2 绘图系统中设置中英文字体", out.width="100%"} @@ -725,6 +724,24 @@ data.frame( theme_minimal() ``` +Noto Color Emoji 字体在 MacOS 上有问题,为了跨平台的便携性,提供 emojifont 包的例子,要引入更多的依赖。 + +```{r,eval=FALSE} +library(ggplot2) +library(emojifont) + +names <- c("smile", "school", "office", "blush", "smirk", "heart_eyes") +n <- length(names):1 +e <- sapply(names, emojifont::emoji) +dat <- data.frame(emoji_name = names, n = n, emoji = e, stringsAsFactors = F) + +ggplot(data = dat, aes(emoji_name, n)) + + geom_bar(stat = "identity") + + scale_x_discrete(breaks = dat$emoji_name, labels = dat$emoji) + + theme(axis.text.y = element_text(size = 20, family = "EmojiOne")) + + coord_flip() +``` + ## 配色 {#sec-colors} 配色真的是一门学问,有的人功力非常深厚,仅用黑白灰就可以创造出一个世界,如中国的水墨画,科波拉执导的《教父》,沃卓斯基姐妹执导的《黑客帝国》等。黑西装、白衬衫和黑领带是《黑客帝国》的经典元素,《教父》开场的黑西装、黑领结和白衬衫,尤其胸前的红玫瑰更是点睛之笔。导演将黑白灰和光影混合形成了层次丰富立体的画面,打造了一场视觉盛宴,无论是呈现在纸上还是银幕上都可以给人留下深刻的印象。正所谓食色性也,花花世界,岂能都是法印眼中的白骨!再说《红楼梦》里,芍药丛中,桃花树下,滴翠亭边,栊翠庵里,处处都是湘云、黛玉、宝钗、妙玉留下的四季诗歌。 @@ -785,14 +802,19 @@ barplot(1:8, col = gray_colors, border = NA) gray 与 grey 是一样的,类似 color 和 colour 的关系,可能是美式和英式英语的差别,且看 ```{r,echo=TRUE} -all.equal(col2rgb(paste0("gray", seq(100))), col2rgb(paste0("grey", seq(100)))) +all.equal( + col2rgb(paste0("gray", seq(100))), + col2rgb(paste0("grey", seq(100))) +) ``` `gray100` 代表白色,`gray0` 代表黑色,提取灰色调色板,去掉首尾部分是必要的 ```{r gray-colors, fig.cap="提取 10 种灰色做调色板"} -barplot(1:8, col = gray.colors(8, start = .3, end = .9), - main = "gray.colors function", border = NA) +barplot(1:8, + col = gray.colors(8, start = .3, end = .9), + main = "gray.colors function", border = NA +) ``` 首先选择一组合适的颜色,比如从桃色到梨色,选择6种颜色,以此为基础,可以借助 `grDevices::colorRampPalette()` 函数扩充至想要的数目,用 `graphics::rect()` 函数预览这组颜色配制的调色板 @@ -1042,7 +1064,7 @@ swatch(palette1)[[1]] 不同的颜色模式,从 RGB 到 HCL 的基本操作 -```{r,fig.cap="HCL调色",R.options=list(tidyverse.quiet = TRUE)} +```{r, fig.cap="HCL调色", R.options=list(tidyverse.quiet = TRUE)} # https://github.com/hadley/ggplot2-book hcl <- expand.grid(x = seq(-1, 1, length = 100), y = seq(-1, 1, length = 100)) |> subset(subset = x^2 + y^2 < 1) |> @@ -1093,7 +1115,7 @@ R 包 grDevices 提供 hcl 调色板[^hcl-palettes] 调制两个色板 [^hcl-palettes]: https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/index.html -```{r color-pal, fig.cap = "桃色至梨色的渐变"} +```{r color-pal, fig.cap = "桃色至梨色的渐变", fig.width=4, fig.height=4} # Colors from https://github.com/johannesbjork/LaCroixColoR color_pal <- c("#FF3200", "#E9A17C", "#E9E4A6", "#1BB6AF", "#0076BB", "#172869") n <- 16 @@ -1101,7 +1123,7 @@ more_colors <- (grDevices::colorRampPalette(color_pal))(n) scales::show_col(colours = more_colors) ``` -```{r fish-hsv-pal, fig.cap="Hue-Saturation-Value (HSV) color model"} +```{r fish-hsv-pal, fig.cap="Hue-Saturation-Value (HSV) 颜色模型", fig.width=4, fig.height=4} # colors in colortools from http://www.gastonsanchez.com/ fish_pal <- c( "#69D2E7", "#6993E7", "#7E69E7", "#BD69E7", @@ -2238,6 +2260,8 @@ data.table::wday(Sys.Date()) ```{r} library(gert) library(ggplot2) +git_config_set("user.name", "XiangyunHuang") +git_config_set("user.email", "xiangyunfaith@outlook.com") dat <- git_log(max = 1000) @@ -2553,6 +2577,8 @@ ggplot(cohort, aes(x = week, y = cohort, fill = value)) + scale_fill_binned(type = "viridis") ``` +留存是 [Cohort 分析](https://en.wikipedia.org/wiki/Cohort_analysis) 中的一种情况,还有转化等,首先 +定义你的问题,确定度量问题的指标,确定和问题相关的 Cohort (比如时间、空间和用户属性等关键的影响因素),然后数据处理、可视化获得 Cohort 分析结果,最后在实际决策和行动中检验分析结论。 ### 瀑布图 {#sec-ggplot2-waterfall} @@ -2597,6 +2623,17 @@ ggplot(balance) + theme_minimal() ``` + +```{r,eval=FALSE} +library(ggplot2) +# AtherEnergy/ggTimeSeries +# 个人收入,国家地区收入 +library(ggTimeSeries) # https://github.com/AtherEnergy/ggTimeSeries +dat <- data.frame(year = 2000:2021, dpc = 10:31) +ggplot(data = dat, aes(x = year, y = dpc)) + + stat_waterfall() +``` + ### 桑基图 {#sec-ggplot2-sankey} [ggalluvial](https://github.com/corybrunson/ggalluvial) diff --git a/data-wrangling.Rmd b/data-wrangling.Rmd new file mode 100644 index 000000000..26d6bda8a --- /dev/null +++ b/data-wrangling.Rmd @@ -0,0 +1,5 @@ +# (PART) 数据整理 {-} + +# 介绍 {#chap-data-wrangling .unnumbered} + +数据整理 diff --git a/document-elements.Rmd b/document-elements.Rmd new file mode 100644 index 000000000..9df58fca6 --- /dev/null +++ b/document-elements.Rmd @@ -0,0 +1,96 @@ +# 文档元素 {#chap-document-elements} + +knitr 将 R Markdown 文件转化为 Markdown 文件, Pandoc 可以将 Markdown 文件转化为 HTML5、Word、PowerPoint 和 PDF 等文档格式。 + +```{r rmarkdown-output, fig.ncol = 4, fig.show='hold',fig.cap="rmarkdown 支持的输出格式",out.width="15%",out.height="15%",echo=FALSE,fig.link="https://www.ardata.fr/img/illustrations"} +knitr::include_graphics(path = paste0("images/", + c("html5", "word", "powerpoint", "pdf"), ifelse(knitr::is_latex_output(), ".pdf", ".svg") +)) +``` + +rmarkdown 自 2014年09月17日在 CRAN 上发布第一个正式版本以来,逐渐形成了一个强大的生态系统,世界各地的开发者贡献各种各样的扩展功能,见图 \@ref(fig:rmarkdown-ecosystem) + +```{r rmarkdown-ecosystem,fig.width=8,fig.height=4,echo=FALSE,fig.cap="rmarkdown 生态系统"} +nomnoml::nomnoml(" +#stroke: #26A63A +#.box: fill=#8f8 dashed visual=note +#direction: down + +[knitr] -> [动态文档|rmarkdown] +[Pandoc] -> [动态文档|rmarkdown] +[Markdown] -> [动态文档|rmarkdown] +[动态文档] -> [书籍笔记|bookdown] +[动态文档] -> [静态网站|blogdown] +[动态文档] -> [幻灯片|xaringan] +[幻灯片] -> [PowerPoint|officedown] +[书籍笔记] -> [毕业论文|thesisdown] +[静态网站] -> [个人简历|pagedown] +[动态文档] -> [数据面板|flexdashboard] +[数据面板] --> [交互图形|plotly]") +``` + + +```{r rmarkdown-concept-map,fig.cap="R Markdown 概念图",echo=FALSE,out.width="75%"} +knitr::include_graphics(path = paste0( + "diagrams/rmarkdown", + ifelse(knitr::is_latex_output(), ".png", ".svg") +)) +``` + +## 控制选项 {#sec-sql-engine} + +[Using SQL in RStudio](https://irene.rbind.io/post/using-sql-in-rstudio/) + +```{r} +library(DBI) +conn <- DBI::dbConnect(RSQLite::SQLite(), + dbname = system.file("db", "datasets.sqlite", package = "RSQLite") +) +``` + +Base R 内置的数据集都整合进 RSQLite 的样例数据库里了, + +```{r} +dbListTables(conn) +``` + +随意选择 5 行数据记录,将结果保存到变量 iris_preview + +```{sql connection=conn, output.var="iris_preview"} +SELECT * FROM iris LIMIT 5; +``` + +查看变量 iris_preview 的内容 + +```{r} +iris_preview +``` + +结束后关闭连接 + +```{r} +dbDisconnect(conn = conn) +``` + +## 表格 {#sec-rmarkdown-table} + +**knitr** 的 `kable()` 函数提供了制作表格的基本功能 ,[flextable](https://github.com/davidgohel/flextable) 支持更加细粒度的表格定制功能。[beautifyR](https://github.com/mwip/beautifyR) 整理 Markdown 表格非常方便,[datapasta](https://github.com/MilesMcBain/datapasta) 快速复制粘贴 data.frame 和 tibble 类型的数据表格。[rpivotTable](https://github.com/smartinsightsfromdata/rpivotTable) 不更新了,[pivottabler](https://github.com/cbailiss/pivottabler) 在更新,内容似乎更好。[remedy](https://github.com/ThinkR-open/remedy) 提供了更加通用的 Markdown 写作功能,简化创作的技术难度。 + +## 流程图 {#sec-rmarkdown-uml} + +[nomnoml](https://github.com/rstudio/nomnoml) 流程图、思维导图 + +```{r,fig.width=8,fig.height=2} +nomnoml::nomnoml(" +#stroke: #26A63A +#.box: fill=#8f8 dashed visual=note +#direction: down + +[Sweave-test-1.Rnw] -> utils::Sweave() [Sweave-test-1.tex|Sweave-test-1-006.pdf|Sweave-test-1-007.pdf] +[Sweave-test-1.Rnw] -> utils::Stangle() [Sweave-test-1.R] +[Sweave-test-1.tex] -> tools::texi2pdf() [Sweave-test-1.pdf] +[Sweave-test-1.tex] -> tools::texi2dvi() [Sweave-test-1.dvi] +") +``` + + diff --git a/dynamic-documents.Rmd b/dynamic-documents.Rmd index 781b2edb7..3b75acafc 100644 --- a/dynamic-documents.Rmd +++ b/dynamic-documents.Rmd @@ -1,4 +1,6 @@ -# 动态文档 {#chap-dynamic-documents} +# (PART) 动态文档 {-} + +# 介绍 {#chap-dynamic-documents .unnumbered} @@ -33,397 +35,3 @@ knitr::include_graphics(path = "images/rmarkdown.png") - 借助 flipbookr 在 xaringan 制作的幻灯片里逐行展示代码执行的效果,特别适合用于 ggplot2 的教学 - 制作 note/tips 等自定义块 -## 文档元素 {#sec-rmarkdown} - -knitr 将 R Markdown 文件转化为 Markdown 文件, Pandoc 可以将 Markdown 文件转化为 HTML5、Word、PowerPoint 和 PDF 等文档格式。 - -```{r rmarkdown-output, fig.ncol = 4, fig.show='hold',fig.cap="rmarkdown 支持的输出格式",out.width="15%",out.height="15%",echo=FALSE,fig.link="https://www.ardata.fr/img/illustrations"} -knitr::include_graphics(path = paste0("images/", - c("html5", "word", "powerpoint", "pdf"), ifelse(knitr::is_latex_output(), ".pdf", ".svg") -)) -``` - -rmarkdown 自 2014年09月17日在 CRAN 上发布第一个正式版本以来,逐渐形成了一个强大的生态系统,世界各地的开发者贡献各种各样的扩展功能,见图 \@ref(fig:rmarkdown-ecosystem) - -```{r rmarkdown-ecosystem,fig.width=8,fig.height=4,echo=FALSE,fig.cap="rmarkdown 生态系统"} -nomnoml::nomnoml(" -#stroke: #26A63A -#.box: fill=#8f8 dashed visual=note -#direction: down - -[knitr] -> [动态文档|rmarkdown] -[Pandoc] -> [动态文档|rmarkdown] -[Markdown] -> [动态文档|rmarkdown] -[动态文档] -> [书籍笔记|bookdown] -[动态文档] -> [静态网站|blogdown] -[动态文档] -> [幻灯片|xaringan] -[幻灯片] -> [PowerPoint|officedown] -[书籍笔记] -> [毕业论文|thesisdown] -[静态网站] -> [个人简历|pagedown] -[动态文档] -> [数据面板|flexdashboard] -[数据面板] --> [交互图形|plotly]") -``` - - -```{r rmarkdown-concept-map,fig.cap="R Markdown 概念图",echo=FALSE,out.width="75%"} -knitr::include_graphics(path = paste0( - "diagrams/rmarkdown", - ifelse(knitr::is_latex_output(), ".png", ".svg") -)) -``` - -### 控制选项 {#subsec-sql-engine} - -[Using SQL in RStudio](https://irene.rbind.io/post/using-sql-in-rstudio/) - -```{r} -library(DBI) -conn <- DBI::dbConnect(RSQLite::SQLite(), - dbname = system.file("db", "datasets.sqlite", package = "RSQLite") -) -``` - -Base R 内置的数据集都整合进 RSQLite 的样例数据库里了, - -```{r} -dbListTables(conn) -``` - -随意选择 5 行数据记录,将结果保存到变量 iris_preview - -```{sql connection=conn, output.var="iris_preview"} -SELECT * FROM iris LIMIT 5; -``` - -查看变量 iris_preview 的内容 - -```{r} -iris_preview -``` - -结束后关闭连接 - -```{r} -dbDisconnect(conn = conn) -``` - -### 表格 {#subsec-rmarkdown-table} - -**knitr** 的 `kable()` 函数提供了制作表格的基本功能 ,[flextable](https://github.com/davidgohel/flextable) 支持更加细粒度的表格定制功能。[beautifyR](https://github.com/mwip/beautifyR) 整理 Markdown 表格非常方便,[datapasta](https://github.com/MilesMcBain/datapasta) 快速复制粘贴 data.frame 和 tibble 类型的数据表格。[rpivotTable](https://github.com/smartinsightsfromdata/rpivotTable) 不更新了,[pivottabler](https://github.com/cbailiss/pivottabler) 在更新,内容似乎更好。[remedy](https://github.com/ThinkR-open/remedy) 提供了更加通用的 Markdown 写作功能,简化创作的技术难度。 - -### 流程图 {#subsec-rmarkdown-uml} - -[nomnoml](https://github.com/rstudio/nomnoml) 流程图、思维导图 - -```{r,fig.width=8,fig.height=2} -nomnoml::nomnoml(" -#stroke: #26A63A -#.box: fill=#8f8 dashed visual=note -#direction: down - -[Sweave-test-1.Rnw] -> utils::Sweave() [Sweave-test-1.tex|Sweave-test-1-006.pdf|Sweave-test-1-007.pdf] -[Sweave-test-1.Rnw] -> utils::Stangle() [Sweave-test-1.R] -[Sweave-test-1.tex] -> tools::texi2pdf() [Sweave-test-1.pdf] -[Sweave-test-1.tex] -> tools::texi2dvi() [Sweave-test-1.dvi] -") -``` - -## 便携式文档 {#sec-portable-document} - -### 文档汉化 {#subsec-chinese-document} - -从 R Markdown 到 beamer 幻灯片,如何迁移 LaTeX 模版 - -默认的 PDF 文档 [PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/pdf-default.Rmd) - -详见[PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/pdf-document.Rmd) - -### 添加水印 {#subsec-draft-watermark} - -[draftwatermark](https://github.com/callegar/LaTeX-draftwatermark) - -### 双栏排版 {#subsec-two-column} - -普通单栏排版改为双栏排版,只需添加文档类选项 `"twocolumn"`,将 YAML 元数据中的 - -```yaml -classoption: "UTF8,a4paper,fontset=adobe,zihao=false" -``` - -变为 - -```yaml -classoption: "UTF8,a4paper,fontset=adobe,zihao=false,twocolumn" -``` - -其中,参数 `UTF8` 设定文档编码类型, `a4paper` 设置版面为 A4 纸大小,`fontset=adobe` 指定中文字体为 Adobe 字体,`zihao=false` 不指定字体大小,使用文档类 ctexart 默认的字号, - -### 参数化报告 {#subsec-parameterized-reports} - -[参数化文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/parameterized-document.Rmd) - -进一步将文档类型做成参数化,实现在运行时自由选择,只需将如下两行替换掉上述一行 - -```yaml -params: - classoption: twocolumn -classoption: "`r knitr::inline_expr('params$classoption')`" -``` - -如果想要双栏的排版风格,编译时传递 documentclass 参数值,覆盖掉默认的参数值即可 - -```{r,eval=FALSE} -rmarkdown::render( - input = "examples/pdf-document.Rmd", - params = list(classoption = c("twocolumn")) -) -``` - -### 学术幻灯片 {#sec-beamer-slides} - -beamer 幻灯片也是一种 PDF 文档 [PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/beamer-verona.Rmd) - -Dirk Eddelbuettel 将几个大学的 beamer 幻灯片转化成 R Markdown 模板,收录在 [binb](https://github.com/eddelbuettel/binb) 包里,方便调用。伊利诺伊大学的 [James J Balamuta](https://thecoatlessprofessor.com/) 在 R Markdown 基础上专门为自己学校开发了一套的幻灯片模版,全部打包在 [uiucthemes](https://github.com/illinois-r/uiucthemes) 包里。 - -[komaletter](https://github.com/rnuske/komaletter) 用 Markdown 写信件 - -[memor](https://github.com/hebrewseniorlife/memor) `memor::pdf_memo()` - -[hrbrthemes](http://github.com/hrbrmstr/hrbrthemes) 提供两个文档模版 `hrbrthemes::ipsum_pdf()` 和 `hrbrthemes::ipsum()` - -此汉风主题由 [林莲枝](https://github.com/liantze/pgfornament-han/) 开发,LaTeX 宏包已发布在 [CTAN](https://www.ctan.org/pkg/pgfornament-han) 上,使用此幻灯片主题需要将相关的 LaTeX 宏包一块安装。 - -```bash -tlmgr install pgfornament pgfornament-han needspace xpatch -``` - -### 文档模版 {#subsec-document-template} - -字体设置 - -:::::: {.columns} -::: {.column width="47.5%" data-latex="{0.475\textwidth}"} - -```yaml ---- -output: - pdf_document: - extra_dependencies: - DejaVuSansMono: - - scaled=0.9 - DejaVuSerif: - - scaled=0.9 - DejaVuSans: - - scaled=0.9 ---- -``` - -::: -::: {.column width="5%" data-latex="{0.05\textwidth}"} -\ - -::: -::: {.column width="47.5%" data-latex="{0.475\textwidth}"} - -```yaml ---- -output: - pdf_document: - extra_dependencies: - sourcecodepro: - - scale=0.85 - sourceserifpro: - - rmdefault - sourcesanspro: - - sfdefault ---- -``` - -::: -:::::: - -### 引用文献 {#subsec-cite-doi} - -[Getting started with Zotero, Better BibTeX, and RMarkdown](https://fishandwhistle.net/post/2020/getting-started-zotero-better-bibtex-rmarkdown/) - -[^doi]: - -[knitcitations](https://github.com/cboettig/knitcitations) 包可以根据文献数字对象标识符(英文 Digital Object Identifier,简称 DOI)生成引用,以文章《A Probabilistic Grammar of Graphics》[@Pu_2020_Grammar] 为例,其 DOI 为 `10.1145/3313831.3376466`,总之, DOI 就像是文章的身份证,是一一对应的关系[^doi]。 - -```{r,eval=FALSE} -library(knitcitations) -citep(x ='10.1145/3313831.3376466') -``` - -``` -[1] "(Pu and Kay, 2020)" -``` - -在表格的格子中引用参考文献 - -```{r, results='asis'} -data.frame( - author = c("Yihui Xie", "Yihui Xie", "Yihui Xie"), - citation = c("[@xie2019]", "[@xie2015]", "[@xie2016]") -) |> - knitr::kable(format = "pandoc") -``` - -[citr](https://github.com/crsh/citr) 包提供了快速查找参考文献的 RStudio 插件,不用去原始文献库 `*.bib` 搜索查找,也会自动生成引用,非常方便,极大地提高了工作效率。 **citr** 还支持集成 [Zotero](https://www.zotero.org/) 文献管理软件,可以直接从 Zotero 中导入参考文献数据库。[rbbt](https://github.com/paleolimbot/rbbt) 包也提供了类似的功能,只要系统安装 Zotero 软件及其插件 [Better Bibtex for Zotero connector](https://retorque.re/zotero-better-bibtex/)。 - -### 自定义块 {#subsec-custom-blocks} - -```r -tinytex::tlmgr_install(c('awesomebox', 'fontawesome5')) -``` - -安装 [awesomebox](https://ctan.org/pkg/awesomebox) 包,开发仓库在 ,这个 LaTeX 宏包的作用是提供几类常用的块,比如提示、注意、警告等 - -::: {.noteblock data-latex="注意"} -这是注意 -::: - -::: {.tipblock data-latex="提示"} -这是提示信息 -::: - -::: {.warningblock data-latex="警告"} -这是警告信息 -::: - -::: {.importantblock data-latex="重要"} -这是重要信息 -::: - -## 网页文档 {#sec-web-document} - -丘怡轩开发的 [prettydoc](https://github.com/yixuan/prettydoc) 包提供了一系列模版,方便快速提高网页逼格。另有 Atsushi Yasumoto 开发的 [minidown](https://github.com/atusy/minidown) 包非常轻量,但是常用功能都覆盖了。 - - - -谢益辉开发的 [xaringan](https://github.com/yihui/xaringan) 用于制作网页幻灯片, -[xaringanthemer](https://github.com/gadenbuie/xaringanthemer) 为 xaringan 提供主题定制, -[xaringanExtra](https://github.com/gadenbuie/xaringanExtra) 在 xaringan 之上提供各种功能扩展, -[xaringanBuilder](https://github.com/jhelvy/xaringanBuilder) 为 xaringan 提供多种输出格式。 - - -## 编写书籍 {#sec-bookdown} - -此外, [ElegantTufteBookdown](https://github.com/XiangyunHuang/ElegantTufteBookdown) 项目提供了 tufte 风格的书籍模板,本书配套的仓库目录 `examples/` 下准备了一系列常用模板。 - -## 个人网站 {#sec-blogdown} - -## 微软文档 {#sec-office-document} - -[docxtools](https://github.com/graphdr/docxtools)、[officer](https://github.com/davidgohel/officer) 和 [officedown](https://github.com/davidgohel/officedown) 大大扩展了 rmarkdown 在制作 Word/PPT 方面的功能。 - -本节探索 Markdown + Pandoc 以 Word 格式作为最终交付的可能性。R Markdown 借助 Pandoc 将 Markdown 转化为 Word 文档,继承自 Pandoc 的扩展性, R Markdown 也支持自定义 Word 模版,那如何自定义呢?首先,我们需要知道 Pandoc 内建的 Word 模版长什么样子,然后我们依样画葫芦,制作适合实际需要的模版。获取 Pandoc 2.10.1 自带的 Word 和 PPT 模版,只需在命令行中执行 - -```{bash,eval=FALSE} -# DOCX 模版 -pandoc -o custom-reference.docx --print-default-data-file reference.docx -# PPTX 模版 -pandoc -o custom-reference.pptx --print-default-data-file reference.pptx -``` - -这里其实是将 Pandoc 自带的 docx 文档 reference.docx 拷贝一份到 custom-reference.docx,而后将 custom-reference.docx 文档自定义一番,但仅限于借助 MS Word 去自定义样式。 Word 文档的 YAML 元数据定义详情见 ,如何深度自定义文档模版见 -,其它模版见 GitHub 仓库 [pandoc-templates](https://github.com/jgm/pandoc-templates)。这里提供一个[Word 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/docx-document.Rmd)供读者参考。**bookdown** 提供的函数 `word_document2()` 相比于 **rmarkdown** 提供的 `word_document()` 支持图表的交叉引用,更多细节详见帮助 `?bookdown::word_document2`。 - -::: {.rmdnote data-latex="{注意}"} -R Markdown 文档支持带编号的 Word 文档格式输出要求 Pandoc 版本 2.10.1 及以上, rmarkdown 版本 2.4 及以上。 -::: - -## 发送邮件 {#sec-send-emails} - -[^blastula-group-emails]: - -[emayili](https://github.com/datawookie/emayili) 是非常轻量的实现邮件发送的 R 包,其它功能类似的 R 包有 [blastula](https://github.com/rich-iannone/blastula) [mailR](https://github.com/rpremraj/mailR)。Rahul Premraj 基于 rJava 开发的 [mailR](https://github.com/rpremraj/mailR) 虽然还未在 CRAN 上正式发布,但是已得到很多人的关注,也被广泛的使用,目前作者已经不维护了,继续使用有一定风险。 RStudio 公司 Richard Iannone 新开发的 [blastula](https://github.com/rich-iannone/blastula) 扔掉了 Java 的重依赖,更加轻量化、现代化,支持发送群组邮件[^blastula-group-emails]。 [curl](https://github.com/jeroen/curl) 包提供的函数 `send_mail()` 本质上是在利用 [curl](https://curl.haxx.se/) 软件发送邮件,举个例子,邮件内容如下: - -``` -From: "黄湘云" <邮箱地址> -To: "黄湘云" <邮箱地址> -Subject: 测试邮件 - -你好: - -这是一封测试邮件! -``` - -将邮件内容保存为 mail.txt 文件,然后使用 curl 命令行工具将邮件内容发出去。 - -```{bash, eval=FALSE} -curl --url 'smtp://公司邮件服务器地址:开放的端口号' \ - --ssl-reqd --mail-from '发件人邮箱地址' \ - --mail-rcpt '收件人邮箱地址' \ - --upload-file data/mail.txt \ - --user '发件人邮箱地址:邮箱登陆密码' -``` - -::: {.rmdnote data-latex="{注意}"} -Gmail 出于安全性考虑,不支持这种发送邮件的方式,会将邮件内容阻挡,进而接收不到邮件。 -::: - -下面以 blastula 包为例怎么支持 Gmail/Outlook/QQ 等邮件发送,先安装系统软件依赖,CentOS 8 上安装依赖 - -```bash -sudo dnf install -y libsecret-devel libsodium-devel -``` - -然后安装 [**keyring**]() 和 [**blastula**]() - -```{r, eval=FALSE} -install.packages(c("keyring", "blastula")) -``` - -接着配置邮件帐户,这一步需要邮件账户名和登陆密码,配置一次就够了,不需要每次发送邮件的时候都配置一次 - -```{r, eval=FALSE} -library(blastula) -create_smtp_creds_key( - id = "outlook", - user = "xiangyunfaith@outlook.com", - provider = "outlook" -) -``` - -第二步,准备邮件内容,包括邮件主题、发件人、收件人、抄送人、密送人、邮件主体和附件等。 - -```{r, eval=FALSE} -attachment <- "data/mail.txt" # 如果没有附件,引号内留空即可。 -# 这个Rmd文件渲染后就是邮件的正文,交互图形和交互表格不适用 -body <- "examples/html-document.Rmd" -# 渲染邮件内容,生成预览 -email <- render_email(body) |> - add_attachment(file = attachment) -email -``` - -最后,发送邮件 - -```{r, eval=FALSE} -smtp_send( - from = c("张三" = "xxx@outlook.com"), # 发件人 - to = c("李四" = "xxx@foxmail.com", - "王五" = "xxx@gmail.com"), # 收件人 - cc = c("赵六" = "xxx@outlook.com"), # 抄送人 - subject = "这是一封测试邮件", - email = email, - credentials = creds_key(id = "outlook") -) -``` - -密送人实现群发单显,即一封邮件同时发送给多个人,每个收件人只能看到发件人地址而看不到其它收件人地址。 - - -## 工作流 {#sec-drake} - -[drake](https://github.com/ropensci/drake) 一站式可重复性研究工作空间打造者,用户手册 和学习材料 - - - -## 运行环境 {#session-dynamic-documents} - -```{r} -sessionInfo() -``` diff --git a/generalized-additive-models.Rmd b/generalized-additive-models.Rmd index d7378eb72..67cc34dc9 100644 --- a/generalized-additive-models.Rmd +++ b/generalized-additive-models.Rmd @@ -13,7 +13,7 @@ library(splines2) 广义可加模型(Generalized Additive Models,简称 GAM 模型)在 R 中的实现 - **gam** [@gam] 实现了《Statistical Models in S》[@Chambers_1992_Statistical] 中描述的所有广义可加模型 -- **mgcv** [@Wood_2017_Generalized] +- **mgcv** [@Wood_2017_Generalized] [**mgcViz**](https://github.com/mfasiolo/mgcViz)[@mgcViz] 模型开发和结果呈现 - **gss** [@Gu_2014_jss] - **mda** [@mda] @@ -47,9 +47,9 @@ demo("smooth", ask = FALSE) ``` -```{r,results='hide',eval=FALSE} +```{r, eval=FALSE, fig.width=4, fig.height=4} library(ISLR) -Income <- read.table("http://faculty.marshall.usc.edu/gareth-james/ISL/Income2.csv", sep = ",", header = TRUE) +Income <- read.table("https://www.statlearning.com/s/Income2.csv", sep = ",", header = TRUE) mod <- loess(Income ~ Education + Seniority, data = Income) x_Edu <- seq(min(Income$Education), max(Income$Education), length.out = 30) @@ -60,17 +60,16 @@ pred_func <- function(x1, x2) { } y_income <- outer(x_Edu, x_Sen, pred_func) - +# 绘图 point_pmat <- persp( - x = x_Edu, y = x_Sen, z = y_income, theta = 45, phi = 35, col = "lightblue", + x = x_Edu, y = x_Sen, z = y_income, + theta = 45, phi = 35, col = "lightblue", border = "blue", xlab = "Years of Education", ylab = "Seniority", zlab = "Income" ) - points(trans3d(x = Income$Education, y = Income$Seniority, z = Income$Income, pmat = point_pmat), col = "red", pch = 16) - for (i in 1:dim(Income)[1]) { z <- seq(Income$Income[i], pred_func(Income$Education[i], Income$Seniority[i]), length.out = 10) lines(trans3d(x = Income$Education[i], y = Income$Seniority[i], z = z, pmat = point_pmat)) diff --git a/generalized-linear-models.Rmd b/generalized-linear-models.Rmd index 6fb117d25..980f5ae43 100644 --- a/generalized-linear-models.Rmd +++ b/generalized-linear-models.Rmd @@ -1,12 +1,14 @@ # 广义线性模型 {#chap-generalized-linear-models} ```{r,include=FALSE} -library(ggplot2) -library(cowplot) +library(ggplot2) # 图形 +library(cowplot) # 图形组合布局 library(MASS) # 惩罚极大似然估计 +library(stats4) library(bbmle) # 极大似然估计 library(nnet) # 多项分布 library(survival) # 生存分析 +library(Matrix) library(lme4) # 广义线性和线性混合效应模型 library(glmnet) # Lasso / 弹性网络模型 ``` @@ -115,7 +117,7 @@ str(esoph) ``` ```{r esoph-data,fig.cap="吸烟喝酒和食道癌的关系"} -p1 <- ggplot(data = esoph, aes(x = agegp, y = ncases / ncontrols, color = agegp)) + +p1 <- ggplot(data = esoph, aes(x = agegp, y = ncases / (ncases + ncontrols), color = agegp)) + geom_boxplot(show.legend = FALSE) + geom_jitter(show.legend = FALSE) + theme_minimal() diff --git a/graphics-foundations.Rmd b/graphics-foundations.Rmd index e66cb814d..3145bdb2d 100644 --- a/graphics-foundations.Rmd +++ b/graphics-foundations.Rmd @@ -1486,16 +1486,6 @@ boxplotdbl: Double Box Plot for Two-Axes Correlation. Correlation chart of two s [复合箱线图](https://tomizonor.wordpress.com/2013/03/15/double-box-plot/) - -```{r boxplot} -A <- c( - 79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, - 80.05, 80.03, 80.02, 80, 80.02 -) -B <- c(80.02, 79.94, 79.98, 79.97, 79.97, 80.03, 79.95, 79.97) -boxplot(A, B) -``` - ```{r iris,fig.cap="安德森的鸢尾花数据",fig.asp=1} with(data = iris, { op <- par(mfrow = c(2, 2), mar = c(4, 4, 2, .5)) @@ -1510,49 +1500,30 @@ with(data = iris, { 箱线图的花样也很多 -```{r} -boxplot(longley$Unemployed) -# 水平放置 -boxplot(longley$Unemployed, - horizontal = TRUE, - col = "pink", - main = "" -) -``` - -```{r} +```{r ,fig.cap="箱线图", fig.subcap=c("垂直放置", "水平放置"), fig.width=4, fig.height=3, fig.ncol=1} data(InsectSprays) -boxplot(count ~ spray, - data = InsectSprays, - col = "pink", - xlab = "Spray", - ylab = "Count", - main = "" +par(mar = c(4, 4, .5, .5)) +boxplot( + data = InsectSprays, count ~ spray, + col = "gray", xlab = "Spray", ylab = "Count" ) -``` -水平放置 - -```{r} -boxplot(count ~ spray, - data = InsectSprays, - col = "pink", - horizontal = TRUE, - las = 1, # Horizontal labels - xlab = "Count", - ylab = "Spray", - main = "" +boxplot( + data = InsectSprays, count ~ spray, + col = "gray", horizontal = TRUE, + las = 1, xlab = "Count", ylab = "Spray" ) ``` + Notched Boxplots -```{r} +```{r, fig.width=4, fig.height=4} set.seed(1234) n <- 8 g <- gl(n, 100, n * 100) # n水平个数 100是重复次数 x <- rnorm(n * 100) + sqrt(as.numeric(g)) -boxplot(split(x, g), col = terrain.colors(n), notch = TRUE) +boxplot(split(x, g), col = gray.colors(n), notch = TRUE) title( main = "Notched Boxplots", xlab = "Group", font.main = 4, font.lab = 1 @@ -1652,36 +1623,48 @@ box(col = "gray") ### 函数图 {#function} -```{r,fig.asp=1,out.width="65%",fig.width=6.5} -## First zero on the critical line s = 0.5 + i t -x <- seq(0, 20, len = 1001) -z <- 0.5 + x * 1i -fr <- Re(pracma::zeta(z)) -fi <- Im(pracma::zeta(z)) -fa <- abs(pracma::zeta(z)) -plot(x, fa, - type = "n", xlim = c(0, 20), ylim = c(-1.5, 2.5), - xlab = "Imaginary part (on critical line)", - ylab = "Function value", - main = "Riemann's Zeta Function along the critical line" +```{r bessel-function, dev='tikz', fig.cap="贝塞尔函数", fig.show='hold', out.width="65%", fig.width=4, fig.height=4, cache=TRUE, fig.process=to_png} +x0 <- 2^(-20:10) +nus <- c(0:5, 10, 20) +x <- seq(0, 4, length.out = 501) + +plot(x0, x0^-8, + frame.plot = TRUE, # 添加绘图框 + log = "xy", # x 和 y 轴都取对数尺度 + axes = FALSE, # 去掉坐标轴 + xlab = "$u$", ylab = "$\\mathcal{K}_{\\kappa}(u)$", # 设置坐标轴标签 + type = "n", # 清除绘图区域的内容 + ann = TRUE, # 添加标题 x和y轴标签 + panel.first = grid() # 添加背景参考线 ) -grid() -lines(x, fr, col = "blue") -lines(x, fi, col = "darkgreen") -lines(x, fa, col = "red", lwd = 2) -points(14.1347, 0, col = "darkred") -legend(0, 2.4, c("real part", "imaginary part", "absolute value"), - lty = 1, lwd = c(1, 1, 2), col = c("blue", "darkgreen", "red") + +axis(1, + at = 10^seq(from = -8, to = 2, by = 2), + labels = paste0("$\\mathsf{10^{", seq(from = -8, to = 2, by = 2), "}}$") +) +axis(2, + at = 10^seq(from = -8, to = 56, by = 16), + labels = paste0("$\\mathsf{10^{", seq(from = -8, to = 56, by = 16), "}}$"), las = 1 +) + +for (i in seq(length(nus))) { + lines(x0, besselK(x0, nu = nus[i]), col = hcl.colors(9)[i], lwd = 2) +} +legend("topright", + legend = paste0("$\\kappa=", rev(nus), "$"), + col = hcl.colors(9, rev = T), lwd = 2, cex = 1 ) ``` + + 还有 eta 函数和 gammaz 函数 ### 马赛克图 {#plot-mosaic} 马赛克图 mosaicplot -```{r,fig.cap="马赛克图",fig.asp=1} +```{r, fig.cap="马赛克图", fig.width=5, fig.height=5} plot(HairEyeColor, col = "lightblue", border = "white", main = "") ``` @@ -1787,7 +1770,8 @@ persp(x1, x2, z, ```{r} library(lattice) -bwplot(voice.part ~ height, data = singer) +# plot(data = InsectSprays, count ~ spray) +bwplot(count ~ spray, data = InsectSprays) ``` @@ -1841,13 +1825,13 @@ plot(Orange, outer = ~1, key = list( space = "right", title = "Tree", cex.title = 1, - lines = list(lty = 1, col = hcl.colors(5)), - # points = list(pch = 1, col = hcl.colors(5)), + lines = list(lty = 1, col = gray.colors(5)), + # points = list(pch = 1, col = gray.colors(5)), text = list(c("3", "1", "5", "2", "4")) ), par.settings = list( - # plot.line = list(col = hcl.colors(5), border = "transparent"), - # plot.symbol = list(col = hcl.colors(5), border = "transparent"), + # plot.line = list(col = gray.colors(5), border = "transparent"), + # plot.symbol = list(col = gray.colors(5), border = "transparent"), strip.background = list(col = "white"), strip.border = list(col = "black") ) @@ -1870,7 +1854,10 @@ barchart(Claims / Holders ~ Age | Group, barchart(Claims / Holders ~ Age | Group, groups = District, data = Insurance, main = "Claims frequency", - auto.key = list(space = "top", columns = 4, title = "District", cex.title = 1) + auto.key = list( + space = "top", columns = 4, + title = "District", cex.title = 1 + ) ) ``` @@ -1901,23 +1888,22 @@ trellis.par.set(my.settings) library(MASS) library(lattice) -myColours <- terrain.colors(6) - barchart(Claims / Holders * 100 ~ Age | Group, groups = District, data = Insurance, origin = 0, main = "Motor insurance claims frequency", xlab = "Age", ylab = "Claims frequency %", scales = list(alternating = 1), auto.key = list( - space = "top", columns = 4, points = FALSE, rectangles = TRUE, + space = "top", columns = 4, + points = FALSE, rectangles = TRUE, title = "District", cex.title = 1 ), par.settings = list( - superpose.polygon = list(col = myColours[2:5], border = "transparent"), - strip.background = list(col = myColours[6]), + superpose.polygon = list(col = gray.colors(4), border = "transparent"), + strip.background = list(col = "gray80"), strip.border = list(col = "black") ), - par.strip.text = list(col = "white", font = 2), + par.strip.text = list(col = "gray40", font = 2), panel = function(x, y, ...) { panel.grid(h = -1, v = 0) panel.barchart(x, y, ...) diff --git a/images/cover.png b/images/cover.png new file mode 100644 index 0000000000000000000000000000000000000000..b12cdd7a341cd10f6926bae93d8a33cf2064ff3a GIT binary patch literal 31621 zcmeFZ_dnJD|3A*@;2@kb3dNDA6iP-$96J?FWF0AcB-wk9LMV}4NWfw zLPENXA&0*VYU~V>kl;v^#Jgz|2)pWH^N`Wn^W#ld{kU7kFr161%KcgCFh;O489bN>Ln1jRc z1xd)YNbiV_-EIy^iP^b0x847tIN>seYE3O}Gj?jccfx)u)+v%tr}+4O44wpq^&mmx zNJug0%dgnHM6NgI@c!$svLuxa(t8&m*J7ecyhM!Gazy`i4VvHRf1ZNJ8KW=*ulgIt zk!S8b9!re<&u<)w=POE_gSQ=jjQ8IcBNyQp9+Cg+1}|=pA)202bK3um4i5ui{{II8 zE7E#!eb&y}cSM_Jdsg8LJ@nYEd zWsz&~GU$K>Zgji>#}Fr68$zfy{B!MJ`|(VJIpr$Uc>h_DAuL=W`?Z1rQ=~#tIbH0Z zMfb;?OyPlJ+W#M8CQuFJ8Z}>TPf^aTa`-hYzp>hDIo4yA*)-+I=0$=wJpuRG{9TR^ zH1Qx&@#&E;GHRx}w+U`M!K@+?6zne7{pi>Ur`IgYHWb`8+Y>IEn7rRA-(A~vS*8<~ ztvChC^0v?O+@Ilva*<}D=lecdzmfH)6|fwTaGZ#c&Gs;N<{CCVE156T zQV2S1MfQ7b!1>zF&x9Q%?yE0x&Z{%h3!4sN+1;Wrk_l}u>Axc(Beg`&XWy^Y@5wXC z(W@qZubvs-9i1W){^G@#<)7{Ry;B=Y6HN&}2j>UNb?xT*9uGAQpIyM9r(h-11F`OO ze@-{w!3Bkdx!%I&(V4Q<9y8S=2~Ja|OulEmmRN3)jm&o4{P{NcWal@3+v!dY2OSB= z@1tC>L6HxZIwxTTzNqWE8qCapd;dH>`3qE+}nz_d7Pm8JY#(cgY$)HEE+e7yTIhTSi`(M!giA}~P5enpmt`OM2fnE01b5i{He=?JH*;^HrO3F= zY>%qne&%Ddy@i^?-ii@kYhGZOzkDla=6kmH6|)BkvS~{`aAxTh9Sz6% z-F{Jvx~ykC9)HRz@>TAk87q&Akg78O@#&NN{34N&%!v79C{&!TG!sdT>x>pOxv82i zy{o-crdjwPxR8Tn&n3mQy|>U;ugOpw{t`4rpn3d+z$>bhEb@lJ-$E9rh=Ox>WUX!l zmPQV2`+%oDi!5F~b$oW5$Bwt`YHMo3!i^eFZ!%KMHwEw?-T(3*ozi#b2TF7FzZ_f& zPf0eRUU%_}SK`%8F~LT+Hz3jCIz1_xe*%TV)**q*?6!mhBWmJn-1AyC=f>Oji$V7}vO1R5r7$`RQoJMuYkmcuh~Mac-zRLTe)`Pq`S2*ZB5w-1=z=9N!FejH9>w@5lT@-6xOX2#9|H#(%wWWGBKQ6N=)zQGZ_Pa81C$gN;+bPSN)`d=6VLIu z3lJiQwCnwFSawphrHgsHKyHeFv1**mE#?nP^;a!gQsjEVf{}4l90bqC1Tpg;&nARz zv8!ypxE5=IU9Q+iC->EOZLU9M$~#5S*mU1M#J_n(o(CU1dX=Oy=DYx5{k&&mpeg+c z6{4y2W0_Yb&KvD-uUUJhV*`lPo+OmmA2coR%_&h<2k;A@<%PE#Et>k@@qOq}Xe8Wi z_ha}rPP40>EsN2=0#D;T6%P>`UD?1<=Z`nEDGZG+>CUh)Te_KYM(dsgh35h|-|Dq< z;y!qN*v7EeS3Bftp3G^!lXJXfOZ$~^ii181?> zZ0?PLFV9ajEc~cmIu~OPGxosX9QHlp*7C&n!Q-sxX)WEfqB!RI`lm)$XM0afccv%Z zlXXyha#R|}59>+H`gIW9rUc_uDL<5Bre&|bWMWw|TOgBCzoDa*?O99|O&TG?py9-_ z#$M^hqKDy71&3G!oG<@9SF+f6zWI}yeIcJ=0Uv*V*VM6^L0Oy&Ja)mgk(L9?4BMoC z^47W5;hI4CThw#JC6%9DnYzcE)mZtoi*VFTXRyktcOf+Vgm|d@cCcFry@2qZdWu&ErZ~tDS=BhtvyDv}{cLU}o_bc}T zdJD$6(x9uPVRj`90*{JeQ}l~)X4UDQg9dS8w$FU24{<-BBZsNg!0>n_t<`)y3SmD6 ziZkB%?QV9JN9dH;aXD$EZ~A>Cs(s_ExQBg+3_&#VE*9MfXTi2hB;$@Df9m^-V`F2F zb<|S7dN-J5#A0VW@B;9hFAt4WF%Gb8({Fm8m@ZFemhcd# z)bT;Jt;p)WMuBG-0b@xmG_qzTg+9)$4za z)EcbLbho9d^G9B-X2RfchH%eI((@P5GB9hJFEOq2V=b>`7aMeU+P)3MiC7YrsTnSe zJUuF|Kk_-aE!XH;)OjNn&)i>PXxtEZlFVxZRce^y$kEE$cSLufa{zI`K}ST}M9toR znU1}37K4%pv*KkjW%Baah3W15KKu3PcN_9Urt_Tz*R-D7BSFQu<<$kNvm)@!|3)a! za+_1m3tA3nvpEcTj_!Ydf2_a6z5$|ylG(#f*~8?X4zSN7KkC!4m}lV6E)^=|oa$<4 zp5bmzkmA1*apv6amDC@Xw40A(7rbB-s73USqx%5=*;do>Y5tHE^Bz;s^bS0MiNRs{ z!K8^UQAbhIaP-N?zSaqkToLo`KWjo0_A&l3Zioh^E0R%l9drB|OqWQfomUZQL*DQ- zkNnWJ1H#fc0xYG&hJZNQ3t7tgxG;;)Z*9m+OxjqESxeDNS5O1O;P3MZ!#?zekHAv#+#hk&goZEfn7Jo@3s&dXc|$mnkYlh0o#yNffdAW#Bn!#ep!>i{X>;FF)l_%#(zxGe)axY zG%oZatm$f+!$H)0{tYD>JM^&ShEg5oL{l>;061+*v2^VuC2@`H301#DUD#%Io1;@%);g`uSN;f{O3+1 zvia^@qc~g+2J)AMM(}ENmfnho@J9tOoASEg1)LXh)OgqNnWhuKcHRQ9dA{uc`VFsVy) zxUn;)yCbjg^vEBCG-#9HvZ%DNJ@3xX#l8=!|Y2erlbxm&#qwBwVbPZf6zL)VlTbJ7f#E z&8JlQ!OK&%H^mGaU+NvJ;)%}3ZicvT-Iq+^8GnB-{Vs%5Z(9o$*7kr&Sh_csgJt8- zg$ET4yYd-NtF4zEoqPg-#br z?3FngDubH9m>FI6%4;p!EaTj`sn1$U%mabZx!=Z` zXZ}>d36LRv2M*FkF7;Sb<0JhZ1K#l0m0@6QH{;JR_~+1JZgURZ_EB#L`dNFQC3yCG zcG5Z3c6xC6`f}49eydiykoNR8@O|i>a9_<$i0T_EnJW(52S=L$m;fyjBA$>xR#$8{ zYg#7aFCcjF0mO&!6Vx@YXZE4^s0sPXr_X%L)YWBk96fMr|JLIZ@uBtgm!Cgf0Aq-D zAvAS$1e)p;+Z-sJT=kdQlHU2nzI!8rPlu9RSxanACN0k7HrKC} z@4C_=Xp#iZD(M2sQ7HM5eus{XV-BA7RIdk~1|9ok;e+dmYw?1pm81|!0TY_&5^=-E z!}p6jyE~#oHm@{9m^{s(O|Nza=vFtlyFD0~a(2?)OqPmQKE-_vfZX+L&k;IL47hhr zkI4-Nju)J%b5t}L#;N-C&mG+^onuEyVhCnQcA92w$x5pE!hWx+S9aR-ILdt>Xi)Gu|>~3tPc=k(WICn z#7SmYKGEy`C+aRww#q^tp&rB)SnNlx%EQBRHL}^DA%fA?f(fACFam8O`EzA)C*fqj z>Wf-3jCb2KSqdvB94nhG=zZii=zl1JU-!XWfU)nZuW{nP?+OS*imw5dWk~v15kTEM zOt9Vcpxe$;0xOB;cs9Tx>8h5`icZ}c@~#0lW^i$zQ*fbVu*^)M4rqiIO4;5&(_f+< zaYo~w*d0PTU-5iw9%);#?X+A&hKVdEIU0v0hszJU=^w(Lq#`{=l&_o_J;qNHt;Wrp!^u)-sUR0X=n)Zu1dZoyU@cO*whQ92gf~v{^3t&Q#9OCd zI_JD<{<9-hy~WwU8hZ)&4ogjmCP@w~Df~iXFUB7GW>=N}(M!0RjqT=DKm1X!^CJhq z-mX zKcUdx7G7?4?pZDCi03_1%yBpxF(ft-J|3Cy|1u{&_mcLAaxRFVir;t{NO}#wabXjXBLtcr^GRXhQdyY`VyiqM02T^74|IMk7TeB+ncQW3{vsvy; z_=)`E<|^4R7XSiHXIbA9;0@j=Vd8Qmo;%8u;)_SYYbo(YN|<#3_|vHqi@DL^P)&Vb%K=?nO6Il#y=p;DIq^FO+q*?S9e6eo6cmh!53#I4WHF-};+Mm^eS!V4= zeS}erUk;k!2)zd@Ya{9osdrB^r`;0Wk508O2 za5#ir#J{=(zvNM~bpjz5S6{|nu#%Z8nU^#fEGy+yW9$k7Z_r)tUf%ffoGdvF%k91z z4H^_SAPDS2Ap9zNFO|vee@&&sNTgvE4tHD{e`7S#VRxC$(p0DU^FAaYZ+vY|fwF=z zepc#A|JbhSURZpnZp3z4`yLH?5spez zz#bbA4*$Iw`&C6@XKpNmBc!W4>51NdGF=QQVqI3`5S1A<%H2x)tZI)6=+}kXm={0Q z>5&+K08#-7<3cFFGugL#0_bM|Q!e{`7zp17+ z{_e%g|2rLj(M7kjH6bOknp$M`?Oj-x^W*w;Si3rK1O~t^2t4R3dY59r2Fdl`y=fA_ zVmx66Xnuk#o%7n<3$7~>mrCqr^#Pasu)NT46u6{ou(N+`0|+Ri4W-0o(f`}Am5*H$Jq&(;OGU7yPyJi?rWN_M< zf?75x>ju;N#Jv!exCl>4m&`Oc}TKt)6f4a5K+QYP- zf^cO@>+}pW$`P?l%CFD6Xpf2=FW(yDe**@x2yWcrp6##3zEbDHQ(c{V>BBE3LoS%} zUD#=_0|KPzHU#e$Wa_$a+dBSy6Pc_S!gl-EyULEYDsj)AJfSk_E6P5jRd8mex?&E8 zJq&w&m)1ZW8}J-fN~QQq^Y*mvt|o9@2A3`SfffA1I{J#NA(B55()tE3GU{aOiLbPp z0ZI^mm0@{essH1b95;sUhg{@7p%NPkxaL|G-+putPPEMQP*voVG{r0m6o}3$d z$qol00A6-1^y)rOL0CM$X#>V$9tT!o^OJxXsO^);=FDrK9?h{DQ9wAf##qtZif)Ia zDC|il7;W~YTv{{=at@k`$NA=Zy#*GFeb&uUpQ)a6D^nr&&J0z0=NLAz`~<4+jCN7h z|MT7-HHUP=MFt#X=ZZ#XnFaJO6|n~2H40?n705mFH~BsGGVBy=ToIt85$?tl8S2He||6N&lv_t>XfhGv_~KU-DU zhRgvRKL>_0%4GsfmZC_u0K5st;gkbl&eO^zNT&hF6L>UQ_fmbyeG_n73_I2)?M`;pE=~46OtOcEy4M=elquiWfOa+& z>z7y`@>X9yFJ(O*bcR5@Q(!sBH~4#CEm_)|JjJHbhm!v0kV*l@(*aqdv6yc&*QB=d z-t&q>p8ONe)=fi){JHuUr^nWRjoc&<_zVmT2;EP!v6ndiMNX$Ei~@G&43XL?O4BJ( z)66$j9V-bu#9v>&)lmNOq&zUrK?|;nUrvkxP`X%X((X{zZJi9(7dC8TnB&jxcBgD} z>hvG_15%Bz-^X$|SQrO$jY6)*OKd!;2yC#s&x&rlT)|qIDr5KU-HntTJ#Yg-Mn z*$=uF1|2?^sJ3fDU043Qr2P3rf_wZ|X_Jd%GvWe>_q;B4K1_{ueT?7zR8x#dBn6v8 z1#^kEh(7RQ+%(BX&G7<8UypP()ks1@2Xy%w&0DCC`H=p;QU(;M?;0R&j2AuFZ zgx7UCt!8bf>pYv!BX{lMO1O#lMvE++j;FGGRzdx@*M-LZG^}d+d1rtvgNIiy{e;v= zd8#YUe&7@^$kdkJ=jJZ$JB9GZUh2O5;GQ2wA{2j3Y6}0mq0;eavk%9JBKXy<_sinSJJr+Wt@O`$p(kjX(;)KavseIB-*u=J5$*}k|5V^EC*+p<^%J2!-504 zfCcJ(>=KSeXkG|>9C{WRn3C6|@&|CaX!NmTLobH%ojX;`l%oaTf9QPWo*`Nm6-k~) z>Ps8EBLLfP3_FzcW3v8?VC>00dcjvGV>!;_UmZph8HhC`)ht+#iWkTm;;2|u80N0A zTxa+i%8a|dA8hj8bwe5q8k_`c0H?f|p3IWNXbB`edt-&+_7;xe#H)z>XBW!jy2KwDA73TUU{bF710_ z>?pvB9tPJQ?9F2>gS(FW^N1O%%JOV)SDv{MgMtS+gYT~q3k(BLK81(FHTI!Ak@)eW zP8XQM9J4OIs0&t!Hq)KFLCRdjKq#ri6s!Vw8VU&EggH$ERQNhze4>z%<2zv{8cB~f zrCC(u=Gj=T%Qc5Hb6}xhWEn0PMUC==P`){T*P?po`~46lphpl=XUDQG>?j}&S>XqL zC-+g%h68&ic8}2zlG_NN#d9Hh)Q9zt5#ljocUkMVAB!@IfBi%56I;%qE-~Aw3haqNiy{;yCJu*>?$m;wn!oh=?!9B=PZp?3fhH{5`e^S8G7 zB9~5bd$~Qweb)_MvCNl4bMi0kb7qt58bRiVbzDSh0H!G|5&e6*uBc=BLrl=c&XLg>qkG^lmA>J*YKl( zHc98q9&Wnmyc#ZQJ&w-eEFkfGhykv@HgX1y%7(T3{PaR68R1_LB(EaabNT(Px5x&4p^Z1}J)Rtqr=D z=NY$F4F)^h{ix8sHq*`d{KT~g;M|<=p5vDJzseASVqt2AErF(cc?qNKtRP>({aV`Y zBCDkRM=o49{2CjjfCiNM6|6~Y;^2NSj}oxrxy^CYh|;}UM*txis&&s2VixMAfXMpt zK{YiQf?CeQA2K0lYHwqFfE!@Vu*;e8l7ix67}ZLu0`r1EJz`MY2$QycsUk zUKSu6-;!D;&y34tRl2$z(-WUeU#!JV;q0DdGUcwptZ&jY^0jCL+CZ`$$P(5>%QfWMESfD z0|x(!x&9L6{*m~L&h@dP)=zRzrh;eqFZj>c;BE9ZHtd?@h8$FxjC>wsB*C#l3c@Bp z3kp8yyU^9;C`p0(2d(!P=YMzELjX7=O%3oAIEt15;#K)`2}`Bxbb8_FWNT6+b4!Xc zt4VJ`y0(44?agwx?bAWO+Y@$WW5lWrLgedsI!M7p~zB)_;7fJ;q9xl?jbqH|dgSJC9+JIcR zSyIcF%dMGLN0y!EOADv+0qR8)b%PI2yHB?W`m^sW_$P~vq6(iHiC7Mtfol5Lu%^$| zC@Rzjk`-Nfx`qptfyb`LW$>B^xk~fDVLWEj(FP^c`e-3Dxuha`?yE&r>r++g?ZX?WXFAjBXOZB$g{RGP-{&hgK1BuOzXV{7Wk7* zJT0u88+Z%rJV^JM3K5pBO6{!Q-xy)vy~8*=+nS^p>{$8h0LF400CdAMt}9o+xpe0k z{ICc;DqX<|h8Ys;`yP7kmTKqU=vaYEP?DIgMl;oghX~rv24y+Iu;5WG_ zIut3mK!p+Q({}k(vbQ&iJ9v9@3<4+l(|3DH?B|Iz&5hIY%i~Yaoo_38%=~N0>f3Licr(CB0P;GMwdS#iBaNU)_MPe zhOH(`xBCx-dS5pO9~JN1n$L7wNp09sv1B!p)H*A~?Sa3HyxH0%`%{LdNpFt|0K%_B z(!wP&%Z9S=$BmW;+0!#6kb=R^#Rhk^0SJnZIEG?};>7JUPSu|OFX;}!uKEsVNve9L zu%k(ru6ya$@ABP}_qOi_Z{O~d+!%d6{&o_W6}9ZTeDNE6SVtz532bQ7V0#TiiDs1a zX~D_0QmE&QS=!`_94Os*owpL2kE(5qI=>X-QwWRXg@KyF%190{r%_+=J>xfP(=>8N z0q#c};8gfl+zMKbHk$)ZI~x=7{KgVn3)S2`&iVFKiJu#=mrlX9xjh{$u!vEjyiUqu z#NwU*)o#75IP-=(dm}?>G|y{g-oxrA?(%2YPJL5i;?u_P!G4jVR44}^NJJTb0vwVI zZiL)FMZ`GI4pQFyVaJhR5&fTSZ&XdFHRrQbQ`Hm%%>oezjn^z|8AqQia~^ZG1j=VTYH)kb{?T|Nb9??JGu)KB85KIR zH&IfuF%k(UeG)sMw0X%sWc%rcQ<|{ZjBh@;hnT#~3krr`1kzD@QZEAUr(?(SB!3KR zVg~f-Bo98kyzs%HW0{PR*rjb9{Nw$BftUxyUy_Ayvv0QcNNrV1T>}4ol|@#01diFc zpER4w3Y?{&rNp_z`)0Zg3aC3nwgWd>%Y(Ur?o|bk99r5cD(N`c!hxu1kix{~+Wa-v z;nan8b{Ei7ne1Ey%%%M_+3^UlDB&Kryw=*@T;aZ)+IpwA{$@MhfMpEZLRzf^ce@%2?N4fM$!Y|X!K#b3H>Ec)VKXM z>NrDQCio#jt5(8j^grd3;PtNwQiHGBF8OVf|3(Udw*}2?DYn z4Nt%hLP@-kc7@5NeSerTyjbc#I`fgNo(OnmOFM^Ex#$?aJlLRlPWIFCgAy z>4_09+!)@me=$hmF}6_J&NY`5XuoNonOO)xfp(-;5VI~%Qt52#Za}!nD&dfo2Nd@t zZGledFcBu3k~cl}d%oOq=kmO&@tY%`$Ivr1`B1}+p<)3GdgXwQWhB7->Am^RRUd~iOg zv>qVD5vl-oEa((>Q5JU%ennSu#TA;R=kS^f`K#UHd3HFuHdUf-qV!pr#YS4#Jk*D{ zez0}=;7!4a-kNq2ruFQDUzz$TD|SLJq!n*EmUQw23jmv1`#o&5CcDvvplmV3SEKv2 z2~pGVZrVlvVCI3)h1L3v#L4IxRD?dc87sNtk!7_1O;|R(P0x@AO6IMTpB~R1R z9tydS#g?`wzHG^Fh5k_2 zGs(EL1jGa7ImT|wydr^L^Ft;))ztjNPzTDKtLhKBbL5vJ6>d`OKYZOIGRaj{{OeCT z3QZ}U7;=MKiWfDh`=WWzA1uFDLsb}5&4NKgg2|d?-OYkdgoD2QbP}A;Db0HyfK)_i z{deakgc6!)lsBt1l|Msav9wOXwA$VVDaSdSxs!#@BNK!?Bp?A6*zI!~iRWEF0`gCf zh#s5n$`s-C7J54-bvIe*c{0QwmAH?Moz=<00pCqo&Kovj)9+P6Zaa)50}1C& z<8g;kCWnX_ss>pU4W0I?2PA<}JJB37TcyxJen5*~gEbsz;dUi=JqEN4QY&dng3RvR z!Am7Ww(oglQ(`PA_#^q!I&Aj8?^!s?=xmk#H~FYKTz)T#1%=H5(~l+lLtUFN5u<3h zwcpi;{uG3iPCb**2GAmHTVsaee5!DADTgnlAeA-w_Y!0}uOQ~Gd9k^SBb35ZkeV=? zl9JfvOHSXR^VY33oRo3c>0INO-YwR=-gpNVr)opkBmbr%*4+l*$vttfD&7w{ zV?6Lym}J!j(-K7Ca|H-sWpB!+7q%wfsm)-dFQ8^rq~F6>pbgT50oOI1TWzyYM}8e1 z5g}$_sr|V-&;xzO9oCbh>L6bJQ z+Y%dRRpKR1K^;Wd)?xr;KQX54+S*G$KHa_QvV6*2rFa44ZeY^{&At2PsJ(LE;YOqa*T(Ijlm0QrQ5b@-alQ6O zH@);t#}whr+m!tvAQ8LO++8y-{vGXk5@Eky)g8fJqV&w8Bt-Im=;k!yPeW3SjPf0) zOqF{*{8<`7h&x5Hoad=hjvTpEZVL2$UrpKib0v7fs44 z3|ak_XS#2JOy@Z}egM?i|5g*?c)hypZkx5I1n*}S_@pt>%ajk*{jf@J3UM`VA3u8+XsIP%->ARFnD`&pC464-hM4vja#?!?jfSf0DmNr$Y`YU#8De5bCK+N`4-zl`dI;KjA+ z3~+>aXrbF}-*=*8u6cj)i$Jq()QBi%6p=%oxIpm6vrzi*Ip{k4AQN?7O+(b=iH_NP zba0+rf3FIFP$5#esYh5VB@JdSU^Cg0Hn_AQJzbljQ&JBO({+HVK(@jJ(x$il-d5O$ zf%rNvb*=sfJ$YY7LyEQi8sh_awF&}u+O81xk?(&v{fgmku!3X$Wd)$%-vDqVYaFdj zjSAaDBrAG?YBCkF@F==Czc1KJPw?-&x*`Siquet83KV0e_Gc(2S&L>OClffl00l?) zIsrO0An1)X zQlY4<1o9hiG3$b>QG_G=l^VOokN>W22x303-||XYdgECWzJd`3q%Prk6uaPo5AYH> zU!2<726Az+c}lHvRsIfeu}zN7)!lUn@FVTpZJ@_WKfk>ei#0nk9(W zOkhnK_+d~242h#Ss0Kg|b1QE|71$2_p$bnzQIFju)z2zT+XWL-1-<`3?8zGDPnR(3 zK;cfu)Mgz*Q69u7FKm9(Kh{3GS{2=CF$=E1>%_6LH|?E4GOX7ohlVv>jGjcbBShwd z-CSqPs2onPvIJE7kXu}n)9L}*LgWOKlB9VI<9>F@zVB>P>yW&TEj3F;BB`9y^5lUj z;ifWk`mn!_kJ>2{x=NG~$RHgj#4&WrGZbxU_?(jQDTu2E9BK-ZKnMCk(72_7wZvNJ zb`lY4GneJhQ}uE?fv!DqA`Fr*IfO3ay=LeQq`S;PVu5c~XLg5fSWI=j~u`(a@@9^o-0ZM=4|P&-@#qCf#{gXX(SEGJsd;j^eyTiQ*UdiIMcht?tQ|z z)7^@Qc>887Yg;-6N^AF)9CPJ90fqg)SNt`Gpqo@OeBS9Dm!$;{HAx|)24 zX8PQ>NYypY&8Q1o7l_Art@$1zgd!~>36ieKd3l2Tv`S_ANgmK+VfoCE!F4uCF(mp4 zOV6V_FwYCZf60*-pkA#nvKo=;JLc?#zlV67*G77nb;Sz%N(f|sxB;M*?g+|E^=8pf zEu*@iX6JIg{H}BUcCOv$>4DM`8{o8uHnd|Zgc*MJPE&0bw=lNs^3dm4!YB|J{;7i2;f`E<+rwtC;ZKdqVV ziR`&7=>Ah9m6^`m!6mqDzTddC2vR{}B7QAlXZgeiq}-fN)V;K>D%OdIVXySiE=VK=?klA8=G; zbKkPk0P`~#To=kIaau8j>Q&Ik%B}g`op}V+aeA!eApW>6!6Va4#Ak92Xaxo@Rh{jwYQLIi}<4Sy)$;MKHBd-u zFuXd0@(iV1Kj|PFFgF#Iqf80>2_Mwa@7L9yNiFq-RcMko#)oy>7~;U_%Oh3SVb%6S zw-*T3O+u>wlj|aF67K8%?xAkmn_obvAIrEO>4M5osSv4Ty=7J2?;RtpS7q z0Wj*NQ(UmkCu5P?XyJ$9d>G;)mv_)~s7w~PBuD-=!frg?gO zV69A@k(uuZ>v=pb4()P~$RTf^PBPW^aY>c`XUebJeuu#jhm4OBVwz(V26K!sa!O$2 z&Woc;j?1lzV~DhGSk>Bor66Y~9BH9h3YMN;GTq3T8p$Y`dj@Ho;+Ll%aiL-;Ny>ri zu%&ef)6ruPOl_v__QjWFEMG>QUnep=JbBKUhSd)^4&p(g-BOQM>w*A z%}4ItuvxG0yKUfMVu3a5z3?vKS1sZ^0Ja*|u0*T>8kGW{@rVI_q*1Qp9ApFMdKAzQ zyaknQ4hzv^c7!G|?J9y*HAh90|y$A9WyL&ZZhyv3HW9w?;2{lhf3D zk%U0Z&xhRZk?@~c6rFL9+8Cur%K4|1-*xPlIm~0Dx;+y*aC*%Z06zWCFTeV zvbr*?jla@rq9n$0{gYDE$95>u#xPyW1slaz7*wXt-hn;~9+2#XwY9Zj>|(YUR)=rG zu~%-gQs_r7@1LZTa{7|#zPn>vKD0o;T6GVq0Qx_wd}+0wm|!15YSrb+uE&9nfJx1Vuoi@<8XcyVf;L(UP_?Kd;?2Lc(gS6TGoFQdSayI9A}xK=_)cOC#Mg;fau17WN6U)yWWXIG=NG}vRf}�|WX z_D=!gcIRQ&R`H9m=zU%Fd=HN0JNE6> z3K__K3;)XP7{n9OWpV1L49bDfq>@#?vd)1H7Gh8z#s{u{A(ppUmt)ZI$h)E)G-{`4 zkp5nrJ#sOEdIV8SnsXUW3Y=ZgaTwNXS+4V-bfvQ|u7>qq5>ce@p*ILktF?~7azsGQ z^(8|jBrDE{0v-PhY421@$mbYhCXGbTLjhMGDw~z6hFOYL!m)>?+=y!{`p?D2S;n@( z>09P*fl~5nupv7I%}JMK7U2>EMnx+^Y^0@ssI<^CIR^!+lYnt;Uuw>!v_3p$ZxVykJ*Y_dBmu-ZHDu{k>(_^$YOU1c#v~&GBKfa|sR#>q+N(>!-d0M}zA!mCxg}k6 zMUv)3qP2etiU?T&K@oQFK)>VN6r|jk(zP~o1SLxUP=3MhNWbdamWRhCk{mk@$pkF+ z9JB>59>7GKD~;oV=zq#eu-6CAfY!zmK?0VaMkwtuKk;cYV=mB-{zhzPd1Dy!*ioPu zfF+dbA$xL`mFDCVk|?hK>4A#xvKVnUsNo8tl{~>~d^s;@rGDr@z{7uG;SZGcxj;M8MA#fUMkb`pe09B`S z!n?+8?V0dsaKr;yQCHG+@lZwx@`pS7|Ks-w`5 zV0|_GB@;>yR?+XLx*eJ49nA4l4*ApH0mpKUPG*QT3CF&KQl%g;3`|lfjyGAyX8&TM z*3%3y)q0)7)1Z>eyiZ$s1XFYsr&O8xu>=arFFf~AT!sm0e-1CSfWybC)5(uCL$1#C zpAKdb68K1zq=}hJ)ZGVN4a;9_%zg302sSGr*@qjV^Grr(QiTKTkLt_; zK&e(|rmm%A2HUxNa!)On9175ahqQ0$y96$>iQ8ROOV{Mu>zFbu(Lq|}1xcb)s7Yy@(6-8B)Gtqu!qlO)odjrQDbS;kG0l-LK09XqVu{9&!f2rV+r;8zAq; zPa%AqJDP;VbB6Lbhl4KYKt&-h^ZMWeTH*Uwa%C6#Qpx`7B`+vAqUivn*o2%h>2 zLzQHWpHerl|0HvVbQ6gMoC!~sfPWp!H5l;3PXUv1YvPgce$OpR2HAxa2*CrOuRz-V zSb>_1yrGSfRB*JQ3W~@vg9?F_XEqC30@>FVlb8EFlQD451LzZi65B;WE3Y%sGh3KIuyHiwL>|%{P+Q8%(ym9=DJ5SN7hMleQb& z%BDf7Kt?L|gFMK?gB?B0MC`%f-SbV_KWpPIS7#jx6|JH-sX8LAe*_41e#DEx zj&^T+JLuecqn+8@U(%@Z-eL3+r2Vae@+AqWF6PrIhx17#(G5M|0d=z&^e3R%K|aYA zq$kfnjCM%y5dR?sd`-p^0<5jtBvb$d*DrJXIz|+7;Jk*$7yx)^&Ru9K)qUIC8~hg3BzjbAD!s@Sf4)tKMh($PuHQ|M)0bQy+UGcvUE4S~=)SwH zde(gCZ+Cx2Gt!wa@<4RC=6}O+$rtgmuv5Y5zG=Fh@BqZkPm2}Pg z;-L1_%rYL3CyT650L}d{Y6y{uD!czeDxjrFBCn7>AwBf)L85SRFLd32NQAfXQpmu- zACItT-~Xrw@^P6czi|IT$`8Hmo@|J`@jU5UbY_G`t5`UQpF^R&kU3DORJuF>Y>hBNyKHA_Tbb%x zBy=ooFj-J@8N>%KRRmqg@HiC83PVs95d&|V45x4|tMwyDtD@)c@3+T%KWPctsy6^4 z=MJki9o$~Kxew1WigXp`+JU(~V5-ZmW==H0#O`aNpU&>Qu(#u#_;u~M6#&yaRmx%(E|vdY(xxPBmg14D3~Fp~Nm^@XNv zI-{hIBZLdO67b=FcsShzlaf28OZNHojR55)f0$qf^%2h(ao&;mBSFdTr4+J94`ffD zF^-H;WOMv9%60MAC;FQpdu=C@3lxVh8^?crY=cmzPj-zev2{(@XtuV^CP8R^YD z>z;8)8hU$4d4!XH+2l^tQOa>S2xSr)Xr{W1S&~Xd=M~%p@5fyCJ_#*iyB6c@0{Q{( z8E1QNAzeZtcS8b^lg{+aPTTaqn!q0Fj5P3N6F z7Cs2-mNV*f6XzD~w-?7im3=b7edl8l%NSQkSLTvW{uk^+wFPc(Z&hGLQV#^k0D8{I zNNVkW_Uzd==@af<{E@mv52Xj7!R&1ku_xdsd>AAp+z5`^HYz9|dlChump{T<84j%P zkg~IldUF+0-;)m~)Pd7CIa$j&=~~1#*74*7k`#br3ncjHgG-G>GSXvzaybSbhkW4z zH9aM38+F#1G|l|_WTW>N^wy;-6dZ-#Jb*Z%@uri}+6Z_vVWD(*3{tc{HQts+M#>Mt zuk<0kf=mSs%9o13JDOSE7G07`=*zzEZ{AmQ1Cbwt1j(*C`%W06Cpbv_&U&p>^kwNO zsYt&8E2O19a%x#X1pM@#x^nUO^?;#{$vLRk-4(-qXfXra9Iv*%9UY<32dzVTL(#^I zG3HyHW>063f^K37pMv9;k>K6^SzhIk#cWQCpYg}HQZgI^q*Dk`GKF|clVYR9&6z-! zNu_%zi8%Ue)3qQ=@?VYAp_xNm#lQQ{;oQ4+`6(j*T;g8#UD^eZ>91@9L-=|DPPg)k z&00wke&@kq-Gdp(D``etvIGCs7>+OVQn$9xkGdKSOa=b?CRo+x_89oMzp$5&X3;@s zk-VS?x(=qFiDZ$Xp3>JTE&#itE^N_r+@!f*wF_xGb8rAKlrk$?t&0N6?HOn(mG=1j zujUA`g{bkSeu<47*l*T?(`WE;QtSknVemlAzj8d%~||z$pTHRXz?(dYy>} zEng415lU_4g@gU22gm;^m#emF zkgSH&f^99>!|cWY^n)}2wO|0>+Ym6QXBd2VkALs`EGM2z-vckaCi3Pf$5k3n3CuTcFs9D^x7;z`p_8T0 z?f2{>(0Yzb$Ev$%A3sr+@<~hZ?%Hrry`ILmj5~1pEHss1? z2%|y&=ELGc(acfliZ>@M_mqdw4$gtlUH3(ZD9^L7)71-{B_8-2m~Y3C*9Hx^#iqUB zNl}hbW7y3KU`1;K+2e~%XW08@#Zy*~tp6ng@r*`;;YF94_$usW6J^CmN|uffT@LRe z#C}sa3z($AT7*bWx(jpMUgLmc(2BWz2-e3;(NgNN8j?VS@uB$S9sgBACrtI z34;kn&Dgr^_OOQ0+Q&M5K($Ws3_(=neaki}WTiV$0W9YeP% z;%>nvruXme;meo8xgZg}G$^fC>*sMA$`^)3f3b=Sai8aHgl%b?HD^I>9H?VJhG%5l zOKDd45opO#ikLz#+>lK8+Mddz`nhnm^pVrTVH9vm*;*XPP$pb=$ufm+wD?hJdfo+# z9k>6ii)m4L*?t!p;d{mn(8s=*1RjMqeS zj?k(Ng%rcVP?g>LhRgog3%%Zd8bV@`Pal|}cpP?@-%5P}I*e0u+!zQijq;cX3X>DD zOxGp~nh`A$l$^|d{-rX`6-IJ8LS=4|0f7(0mf2#lYkLPsg*qcJxD48Lv3*nrJ?G@e_?v>`}M=WlLR6S*rOOw-k+_Z*S!66(Jg7RuLknx_Cd1`X=f(_D~RS}IK%e%h+e;&>&>3p+nkVk9%tK3v{!TW z{+eYoN2hcl_3Fjd`S%EKb0>oPCzeee1x*W3-O)WaVYq>IAy3I-F^qaIU#^{tzcPta zj`7%9o3eKL5Zyd&%HtmO$GA65x&fIW5-KaKKC24d*D4eLA+ZpX^41R25fcg<321^% zVoOyIcpOd*7ZBGQzn_z#+4P41t^HrB)!e?b^J$~7D>`?T2*MEnNHq_<`lXcll*&jwHy`Wu@O8qI~rGo-b)V_}o=4BD34uc<0!_*~i z6#N~cH=07)k1mf^_U<0ugCcwU8w<^(_F70uc@Y{^4-4PazBIGDFki-u8ztGbKbb4o zg!^h|{`R7fXaGpUt)Lk@>G!kp{fKVALG~e<{=TTg8&zaf75WoyzG$#MBxs6Bi`S&( z=WAs_H^w;#@Ct{giOj8`ujB#$XwWn-y0dHV?J!k^t`02L!a$+>BsmCfbzf+CU05Ec zh5koL`8yWZ6K8h(xb=q%P%(}VVvC}KV7#LJ#=NstMKwQ-=<~8uyw5X%50>)W234R` zE^X6W1)46OEoixLG?s}+in>kEE}ezt72`N@I}SjADAr_)bAMqhRlTlwN{bk>s?B}Wb8!+Diw zPdL>+faMUOGJ;Ed>BN5L+nov=%)a!nK!>(9eA#{(ipnKG=jNo}=AFHlRC8KqCC7G4 z{vSP0MKdgIA(8R@VNOmCi013xLxAQi>?UN`ad?X?!RU>uQ>iD?f8%yUxr`H7vgnyO zs;QG{vNY%eAfCc-gEn@)+a$;zU$e{CWg>XRjH$$V0EUJWuWwBC4Hr86F>STHmFIbg z?Lnqvh?PN&_1iTdBBrjsllr2{$k!&K!k#C!9tWRRJwr9( zo0|N#o}Lj!@12NVP_iM-uJp{qdy1(EXi;N~gLYRU5%qYTZd<<9%jc)x{W9taxZ98v-}SB$<$rDp=$^S5xg~Up$YpH)1=CN<$R%dBdNC0W6py9@^oz#=^yYk|C@6LE+jq zZ~lNxodJ@OiuidnA`SiHmMFERgE0(4!IAvH_&%(TO%CjSY2I=76xcLTi3f$$B591ASIG_=PeS$zL`_drmTI5 zYI5)KBX{3EQr}@dbI>8qc1`PHEY(sII{6m8mVev7Ps<$kB0;6VJkNXjGhlBtD|anm4^#haZK|4)g40Ug`;I4^kTuE_A~(? zEUj5&hIgt$R(;hTxCL|RIBpMgoVp|lJkwQzr~xVgQyve*?BYo zG;e8z3XFFP&Zi)uR8dX6YV`qrafg-Gx8E5R^`|gR8f+t?mdc(*8>l(5TPa%8g*t4n zwVsu;3Lx%!5XwJZ>5Gd^NYnOXzlwnacEK2zqL-bP2^4?|7(~2mg zEUin-8yvHL7OA^^4Dies zV&85Xc}ku~m21HY-Kl(u`OBYok4F4S!-w-J;oXZF`^715LVRhL^b$nC>vfVKvC!=yQhkH^7Xc-+(gZNxtB75 zgI<ad_lU(ww4^QpaP1PF!JW%@a zUu2%pv+G7+M~@EyY%7!~cI~&1e)XaMCOk~zRZ-;FRCkoOo)b(mrwQmCU;`G!7%&m8 zK+~dihp509tR?gnVjFhkhEx8pct;HzOfVkkb z$m1zGjy9VeeP#vI!bjj=-qo(zf;fe{d|_o z(o^Smpx*gPe%6k-5jt@dX({M{h@l4O%6ZL$euOoL_UaX6tI$4Uk+6^1s_A;73Nu5K zz4vBSJ!76+n#e0C|@J7!u_+$}(h3zaJm%cKuB`fVcH(O*J;&l8jTNEVi zx#+y{Jc{zA3Q3=K)|SzRS5G}FC0vI(X7{F?e%52`wK6ZU_V0GIBn89o{Qu)sP zjLm`b%Tf&TLSMIpVhsArd8fbdW@5Fz;4ou_n z4(VWw)oBCHTx32eAYgT{zn3NZ+&8ev-`@rPbhTb&ci~p{AHa{oKp^)6mBC3RxE}HS zzi8HzvDWrVe}PVy&~^txPRqdmG#aT?D8x1id;DkdgxTKgNI;<(L`{-cve+{N+U@V0 zIDBH&A8W1j2%gi>a*UAM&;_~Qe(3zJIq$1Aav!?1}l zS-ov2xJ=u$+stLod=VluE+WwEYR@%OfuroST#y`qWAlj%#x*q5SxGS($&wPNVL)kVcp*A_^p97A zFD~emrhJ}b9V=noxoFbtUmvJbVSZ8j-PTFV|x*g1#O-^~=3AumT+08=S@w2_Dde zsENu_5L6H^4wq?yoWc8twa8$zaGEJYq8@TY4x{8?tJ=nMJ3e^#oY*pst#_av@!Fll z0X!RFBw^Rn)OXAZ%O;LG={^CY!01oLwz;A^cIuE`XbBZvTG|cy09Y7_=C791pCITQ z>3>Pqd{RN5wjN$Ds4|deAUp>pmgmEV#}%;3NM_Ix3YOFS8B2 zNlGvSS1#Funn)2F-g*TDhD}I1je(M#Cy-mYUX%Zo@?)AX9%VeRXNa41G{l6yW@~!= zV)`leBd;9C;ePeX_^N`Ti^N)wFT*OM&Hsu^H6+l$FA7m)+D7_!O`s~_N#!R`9M-j! zbFmH;t*Oq6AA@xrD6)g008`v=(;V9f7!`>x>t@tIDpEYphzERv!a8?3&-+ z4}#}B*8JJIjzkH~<=UO)c9O-xAEQX@ zukY|FcfEbbv~?DU5jPjkos;MAer&Q8AV`NuNQJbm6WKxj`**j2zJIn|y(O8+_5l~y z`(;xunWh?&OQOM}()P!W7@Ystj(Fw8g11gpovrd!6;~vB9VGr~qTGoY@hbyWZW;yB zzjP59?D9R0lF!l@I`7a3K4GyWO`dN5^Zp;BE54gDkfRXZcCXJU zV0!|ropCUy`gr8Ixy2)5Tjy<%UC9X;g>r-VK%2|@MH*f(c%E&Y1}WGI)j9J?kL!)U z#eCn?phYfAZF<)Kj&dpH9GThJg3S1q7g90t@i=t|ekr7BU3@Y9fxC#%!`)+VWjiLaldg@b_O0S+f(4dHx$PX; zoyFH{kjXjWs53)_ z7kM1K)?QHmXNEQg{^O&kK>>_H0z;SU4t#T$&i-qJmYxQC2$Hp>Taq@b7Do7*8*i@? zoVd7own5WPYJ|4U)s8nc9VWpO5}W3?3r4XqQcKzI$M$4+Jq1th*&>48m42p;Pi1*w z`-axH786r?ZR&Q{H2)!?{S>XR@2;Tq7w0K$j2tS!ky@B&zqn=y0_B};+v`adXi)*F zZyesCYtqx|9l0F%e0;MS6(-q#ptjGfHpJvPVUbh7%e!4N1fNRw1QC`_&#O(&0m7zO*h%nb%4O~$dnWRID#J+gmV%#5>o*vJQp zvT4>u04QC#U@CVCmTLh3!teG6t>x9EY#pA4T_WfHb8d$dVTf_gw6=pHKmhaWr2D;y z)#1pi5>E>^4}~nRsSpQ@V!m;Dmj%R_GOY|0;5u+p!WD#GOehi93R(`u^0VtQ6FhG& zEDH3P`VgCbV7@uuuj#$H+&KwWXTXl~W;?67Ufx+PYXJ5|0~UV-(YvJb&ZA@@13v~H z(z$Ed@g{&EUNJ_1PG9tG`kFuCx`{DF+D_(JHVvfIoO|tVkFk7YVo;T1^_A@TZBH>y zEl5|BB0u+=5O=g7N#@3ZA_!sopu%ad=Rhb8efb#|rBuW|hr)k9iGIZ%#qVyioF3O{ z3jFTRbR*z1^X$EBUaO@9AapVtV>iw)%Mqx|Yvx~Xasv>EJL=7#b2uQXsc^fUvEJd} z$rT*GQe1G6_|L13sLBx2okm{9p+`&H1MSek!eJI{Jg8?kezQqaJF6Cg}?@3>^Xk4K*1G)1ynL7l2#qG`;uowQ;>2j@fHLDA;d%O^=Y9d zW04F0Z;h zyyGPqD0A^Z&=GnK3;`R-t823H6ByBui4HwD76B;Y?4{y`k7ONkzvJ+lxo<*9ad*mh z*p*WiV;pZ6J2L_xqCw(Ov@&s$d{4kxniJ9y{7gBzV z=>H=GZ=;DV^>1_q(*XNQX0-U0>A$@lcm|1mMT*7IFR69Aci>EpcHl*w_p`hSA9h@@ z`dzq^1yhB@!c(VTt#yKfAY$5{q!B>&8|94WBgPzkri>m8PA}PCz6TzeZ%fu+l~bT= zAhsNscY+uZ+ei017odzoyjoZIuIMX2eBsHDc6pTiX~30N_na9_>T7#I(i`Pn+aQle z7Zohz^6@K7&jmCrUZy$&tEH`tgEYaafCmyJWoq;kpqtP40w%V=w?{mp?6( z@l$NKJ=-JDzZ4C=gA{2=$1}e0?EX6TI`BQZl;r;O0_3j@TvOLHCQ-UG$Svr3WpQ3U z+~`}%K>Z27>({+s5`*3_9-vDwiZD-uJ4mbj%6Uw>(Fz*P^yjG}Y=2Xr1dgJZ`jLPY zWdV^{TPRMbtL#<|QcCe<$$hp$)E)6m%o_Xz+G;)3rrK6rdPYX}7i0Dz;jD94K6&V( z_vss>l7UjHZ?u+p13GdFm)W$h9Gzdg`ZUl$=JOmS8GKOgxYMtWqF>I;+jEA(El>EFHmm*c;OjVdppKf z7Mbeyw$Qu~IkU(8;1&dgVtkTm8a+bl!1iu0Gu>l$d<&c*Gei~*jtwNSV3P$w1qBNVh4 zp5TEgG-D^Qcj$KNMhs;9@Ap}E-?$lzS*z%`Cq^>%QRM$kJJpeX2L<|pMIqyfhN*GVkYTv>BBAQ(dsd#{oILg;s zseqLy>MbS2y*1~2iy3R!rG1F+&*_9ZFV82lvJZ0WggSJ`gAbttxl$`@4t|&&SoL(n zIUjmuYV@_kB?Z7UC@kZ&>0xU(KBHuPGsK?+GC?=>O;nX;RTq+goZ8l0zatT(a6r(T zQ<#+qHeuv&6RhwnSj`OOb{54h8S-BG& z#&EmsaN*OwpxJ9IvG{@${Z5c=OI-!isfdqPt-+`RJAnMby8Id#ym062Ee-taYKthe zA6}e90L7l;fpUZ}IIG8<6G(A`&o=^<|F?q*59c&}dOsG{OF7xjG0F^RE4qNR%%48t7OJESiYKEXXsmx`w#cUcC&-}DHXN#0i|_m3UD8l(DFw{Y(5o@4E5BY)&+x%empoZ2LCJIt_eQa@R#djA5#4dbFry#d_!PW4x8ZFmyHB z4}{oxc$3|p9y=;M;W(~`D&xTNJOml_{pp8XKD=59RE)1mrVr1w!IO8;_@Lr}ouf#v zk_0Owp^L;wS+>2(kq5;p-)>VyUpeJ@oBGY8(PgQH6twUy*9o6>ZMwHWGKCTGriBej z6gh8>=#p~jGNLSRnd=$T)b`=}tU#v-opiD5;P>`D`>^@7U55e&_qBrQLU>>W)6&qT zGW8Rq8=v`ZewKdvF{JTkP<5g^TYZ>Rd~(zyuSjAO^B{3!DFL^2d$ocm*klZw&*x%} zAZc9>hH~yAVHZ;Bx;n`Gk%iQgEmm2LLtnS7w0aJY;x0r6R!EH$*yOABW!*P)vj|aK zJM^Vlo1-G(Qi;t;BU+_se6p<9ZL%wD^ei0l;iS36noLC$5#0rCO4lX0yExG|3)y_m zf{C+8E)UM-I;I1+^N>wRLR`fqja^aaOcj@A`!8Eoqvz+k=<2uR(XkBGvZK>K7CV$e za=69WLq|4TUsv%`E`x=UMWql`A?76BSH*S{tVPjBcl23Ky;){|$kM}md+#H8Y^k3x z0r8>(YQyGK&J-VsQ48k-;qTS_&4x7Z{C2;qU821CioRR(ml93=+{`@_#-a(Pfb%4# zrrIE9vpIf##m!cT5Q6?{A4J7bZ8D^0`i-LNDnpXNTy!fazXyUxUSF5%jFc&LpKUVh zt*6~=F@I%n@b4?1=!BC z{_IMA8Z}iWe!_@EG4AknoUhc?SgP_S$1f$^CmuB>y@8A7wsA^asjtV5(+wADY#+$D z3D0I|bgS4W+1@v!yEp?e=nzo;;t68Z0noR9OK;I>jurOdUlNg>6Gqy<-W_^|k{S3Z+s+DXGp_FV9lw!nG|KeaGs61d{mLWK_eM3mrpCiZ^)zQfi?zR$f0m%DE_nJw zmRLRYC7P=C!P=zym*`t5U;`#v2_;~s0e69!EO}?cr;r;gq%m~|`!e`efhxKn*Vhi& z#bNr(nr~Wa&yVS6nIl)Y`HV;vAYtk^39@(E0*H+3hsPYEVUuME>-#+8=d*_D&B-k*9<67 z82&uAxb5BkNQ@&?-`9WzH^O~o^k(Rf)6Ehh8|%4|7Y3bVv&VL(y!S!L94)ttrLFFG zgE0f5$B_1fDZ?Pu@G3;_l#{o~NBN_xEA?hKZZXWtHt zA3QXZQwoRh@btar@Qu}I0ouQjihX2 z=a~^-%Zug(N)}pyA%B;-Xv7{Mn4rQXC@VDwb3?pkWK{HUKojWHz{p3 zW+X0lpM-n+@lv$DmpmZQlMiThs4fSiQ+(Fiia~_Kn%T5HJQNK{ramIzI+K3HU(W0c z?tnFs;_0}kW)d5ottZ_?yeXbHJa@mpH%`GD7_CzLbUYNM;^>cqc?p*2)XxfagJ8j@RjF*P4@ zYn)3KzIH&wtJdn~ky8UUCcax!lTS5;>_T;%2v;Eef}YjUR3O=VK9^)X?nL}@k{j?+ zM;-?>TzqSBrDpj&NdoAE&Dth>o_K1*j&i{*>R)gvzu=2+Pkq=8&nr>+=D^J=n=1wj znHQmp>L*lHx`|u23Nlxm%*lBCp;c5{1Dz11+w-Pz#7mNnlV3p~GcGWGXdI4skUSG|8~27cqv@$BL>0`4OZPJ z4O8G0E3kN6s*|aMtb$u~`waF`+@LGAl_3oaeLLlb$HuJ?JQ(zCilpY- z@`7tz$iw;*#M8fH?C=F&3o6E?@zMn?Oa>QMc}n_0h%NMpr}ZePzF`hB#|>uxZ{-5K=g72Eopy3-&eQmX)+=M z54z8RqvGD4>1IKG1!1*S)9ZFciSd1q#if>u{ApjuUNo$i(VcR6GZ|bAb5h@fPV7eq ztmX;ytY?Ul53sTguxlnf)RQPjxzL$L0vvw&qB-|EjBIvz9!sc3Yxf)0YiW%@l#Iox zbZ%A@=&j7FRf=XFa0|>>IBE+{MsS|pG2DIjfspYU6)%s_$Ih>5>aA!OCpxMJZwKUx8s_6xR*3|3jN~TVcYVlGv4`ev#cr#&T8Rqr0vj6a{1FS zCW5tP$vAMqOSjY+zE$#HjfS019G`o>@(Ggj(|p+@F=N6cl$86XD*M*X;xL>N$sD;>TDz?eYl4T!sSq*+ItIF)_w3%r{AG;Q1=uwfVs*S9 zy0yN&h4!0w8M27ya1gj}_2uDQ+Lwn)IGu}omxVk@#(hRExBqLIKhKtm7bHlJN4AbX zi$8ec1EnU)yG8$Q1zcT-LhRvJ!U$68b z=JQ;4Vit`4?;G$1_y<;?LrfL_`${%5c%{#X>i>VVfBp~w{|8rXm-N9E*Ed&xNx8%E P+J`h$bd(Df%>(}j9FF8? literal 0 HcmV?d00001 diff --git a/images/cover.svg b/images/cover.svg new file mode 100644 index 000000000..646bb796a --- /dev/null +++ b/images/cover.svg @@ -0,0 +1 @@ +−4−20246800.050.10.150.20.250.30.350.4 \ No newline at end of file diff --git a/index.Rmd b/index.Rmd index 553916d0c..f6ddee86d 100644 --- a/index.Rmd +++ b/index.Rmd @@ -1,5 +1,6 @@ --- title: "现代应用统计与 R 语言" +subtitle: "Modern Applied Statistics with R" author: "黄湘云" date: "`r Sys.Date()`" site: bookdown::bookdown_site @@ -44,6 +45,7 @@ keywords: - R 语言 subject: "现代应用统计与 R 语言" description: "线性模型理论及其应用,注意模型的适用范围、参数估计方法、模型检验和诊断,理论和应用并重,同时附以真实的案例分析。将线性模型、广义线性模型、广义可加模型、线性混合效应模型、广义线性混合效应模型和广义可加混合效应模型融合到同一框架下。应用层面,要考虑数据集的平衡问题、缺失问题和异常问题。应用场景包括环境污染、流行病学和风险控制等领域。" +cover-image: images/cover.png --- @@ -57,7 +59,7 @@ description: "线性模型理论及其应用,注意模型的适用范围、参 Book in early development. Planned release in 202X. ::: -```{r normal-dist,fig.cap="统计学的世界",eval=knitr::is_html_output(), echo=FALSE} +```{r normal-dist, fig.cap="统计学的世界", eval=FALSE, echo=FALSE} x = seq(from = -4, to = 8, length.out = 193) y1 = dnorm(x, mean = 3, sd = 1) y2 = dnorm(x, mean = 2, sd = 1.5) @@ -89,45 +91,41 @@ vline <- function(x = 0, color = "red") { plotly::plot_ly( x = x, y = y1, type = "scatter", mode = "lines", - fill = "tozeroy", fillcolor = "rgba(92, 184, 92, 0.2)", + fill = "tozeroy", fillcolor = "rgba(102, 102, 102, 0.2)", text = ~ paste0( "x:", x, "
", "y:", round(y1, 3), "
" ), hoverinfo = "text", - name = "N(3, 1)", - # name = plotly::TeX("\\mathcal{N}(3,1^2)"), - line = list(shape = "spline", color = "#5CB85C") + name = plotly::TeX("\\mathcal{N}(3,1^2)"), + line = list(shape = "spline", color = "#666666") ) %>% plotly::add_trace( x = x, y = y2, type = "scatter", mode = "lines", - fill = "tozeroy", fillcolor = "rgba(91, 192, 222, 0.2)", + fill = "tozeroy", fillcolor = "rgba(153, 153, 153, 0.2)", text = ~ paste0( "x:", x, "
", "y:", round(y2, 3), "
" ), hoverinfo = "text", - name = "N(2, 2.25)", - # name = plotly::TeX("\\mathcal{N}(2, 1.5^2)"), - line = list(shape = "spline", color = "#5BC0DE") + name = plotly::TeX("\\mathcal{N}(2, 1.5^2)"), + line = list(shape = "spline", color = "#999999") ) %>% plotly::layout( xaxis = list( showgrid = F, - title = "x" - # title = plotly::TeX("x") + title = plotly::TeX("x") ), yaxis = list( showgrid = F, - title = "f(x)" - # title = plotly::TeX("f(x)") + title = plotly::TeX("f(x)") ), legend = list(x = 0.8, y = 1, orientation = "v") ) %>% plotly::config( staticPlot = TRUE, - # mathjax = "cdn", + mathjax = "cdn", displayModeBar = FALSE ) ``` @@ -149,7 +147,7 @@ plotly::plot_ly( Bradley Efron 在他的课程中谈及现代统计的研究层次,第一层次是基于正态分布假设的,这种类型已经研究的很清楚了,往往可以得到精确的结果,第二层次是将正态分布推广到指数族,这种类型的也研究的比较多了,常见的情况都研究的比较清楚,罕见的情况也是大量存在的,特别是在实际应用当中,总的来说只能得到部分精确的结果,第三层次对分布没有任何限定,只要满足成为一个统计分布的条件,这种情况下就只能求助于一般的数学工具和渐进理论。 -```{r stats-level, opts.label='fig.small', fig.cap="(ref:stats-level)", fig.scap="(ref:stats-level-s)", echo=FALSE} +```{r stats-level, fig.cap="(ref:stats-level)", fig.scap="(ref:stats-level-s)", fig.width=4, fig.height=4, echo=FALSE} par(mar = rep(0.5, 4)) plot(c(0, 10), c(0, 10), ann = F, axes = F, type = "n") symbols(c(5, 5, 5), c(2.8, 4.0, 4.8), @@ -176,46 +174,6 @@ box() 下面以区间估计为例,希望能为传道做一点事情。区间估计的意义是解决点估计可靠性问题,它用置信系数解决了对估计结果的信心问题,弥补了点估计的不足。置信系数是最大的置信水平。 -1934 年 C. J. Clopper 和 E. S. Pearson 给出二项分布 $B(n, p)$ 参数 $p$ 的置信带 [@Test_1934_binom],图 \@ref(fig:confidence-belt) 提炼了文章的主要结果。 - -```{r confidence-belt,fig.cap="(ref:confidence-belt)",fig.scap="(ref:confidence-belt-s)",echo=FALSE,fig.width=5,fig.height=5} -library(rootSolve) # uniroot.all -options(digits = 4) -# r 为上分位点 -p_fun <- function(p, r = 9) qbinom(0.025, size = 10, prob = p, lower.tail = F) - r # 上分位点 -l_fun <- function(p, r = 9) qbinom(0.025, size = 10, prob = p, lower.tail = T) - r # 下分位点 - -# 计算每个分位点对应的最小的概率 p -p <- sapply(0:10, function(x) min(uniroot.all(p_fun, lower = 0, upper = 1, r = x))) - -# 计算每个分位点对应的最大的概率 l -l <- sapply(0:10, function(x) max(uniroot.all(l_fun, lower = 0, upper = 1, r = x))) - -plot( - x = seq(from = 0, to = 10, length.out = 11), y = seq(from = 0, to = 1, length.out = 11), - type = "n", xlab = "x", ylab = "p", panel.first = grid() -) -lines(x = 0:10, y = p, type = "s") # 朝下的阶梯线 -lines(x = 0:10, y = p, type = "l") # 折线 -points(x = 0:10, y = p, pch = 16, cex = .8) # 散点 - -abline(a = 0, b = 0.1, col = "gray", lwd = 2, lty = 2) # 添加对称线 -points(x = 5, y = 0.5, col = "black", pch = 16) # 中心对称点 -points(x = 5, y = 0.5, col = "black", pch = 3) # 中心对称点 - -lines(x = 0:10, y = l, type = "S") # 朝上的阶梯线 -lines(x = 0:10, y = l, type = "l") # 折线 -points(x = 0:10, y = l, pch = 16, cex = .8) # 散点 - -points(x = c(2, 2), y = c(0.03, 0.55), pch = 8, col = "black") -text(x = 2, y = 0.55, labels = "p2", pos = 1) -text(x = 2, y = 0.03, labels = "p1", pos = 3) -``` - -(ref:confidence-belt) 给定置信系数 $1- \alpha = 0.95$ 和样本量 $n = 10$ 的情况下,二项分布参数 $p$ 的置信带。样本量为 10,正面朝上的次数为 2,置信水平为 0.95 的情况下,参数 $p$ 的精确区间估计为 $(p_1, p_2) = (0.03, 0.55)$。 - -(ref:confidence-belt-s) 二项分布参数 $p$ 的置信带 - 区间半径这么长,区间估计的意义何在?增加样本量可以使得半径更短,那么至少应该有多少样本量才可以让估计变得有意义呢?就是说用估计比不用估计更好呢?答案是 39 个,留给读者思考一下为什么?读者可能已经注意到,置信带是关于点 $(5, 0.5)$ 中心对称的,这又是为什么,并且两头窄中间胖,像个酒桶? ::: {.rmdtip data-latex="{提示}"} @@ -266,7 +224,6 @@ ggplot(data.frame(x = c(0, 1)), aes(x)) + 覆盖概率 $P_{\theta}(X = x)$ 和参数真值 $\theta$ 的关系 [@Lawrence2001;@Geyer2005] -比如总体为二项分布 $B(n, \theta)$ 其中 n = 10,在置信水平 $\alpha = 0.95$ 下,问参数 $\theta$ 的覆盖概率是多少?随参数 $\theta$ 的变化情况如何? 以抛硬币为例,我来做这个实验,抛10次,7 次正面朝上,他做这个实验,也抛10 次,4 次正面朝上。换不同的人来做这个实验,结果会有所不同,实际上,总共有 2^10 = 1024 个结果,每个结果都可以用来估计未知的参数 $p$ 及其置信区间、覆盖概率。假设参数 $p$ 的真值是 0.7,抛10次,6 次正面朝上。先来模拟这个抛硬币的过程,重复抛了10次,是 10 重伯努利实验,正面朝上的累计次数服从二项分布 $B(10, 0.7)$,从此二项分布里抽样一次,相当于抛10次硬币,R 提供了模拟二项分布的函数 `rbinom()`,实现起来非常方便,为了模拟过程可重复,设置随机数种子为 2019。 @@ -336,32 +293,24 @@ text(2, 0.35, "$\\frac{1}{\\sqrt{2\\pi}}$", cex = 2) ``` -两个二元正态分布的碰撞,点的密度估计值代表概率密度值, - -```{r faithful-dens, fig.cap="散点图:faithful 数据集", fig.width=5, fig.height=5, echo=FALSE} -plot(faithful, - pch = 20, panel.first = grid(), cex = 1, - col = densCols(faithful, colramp = terrain.colors) -) -``` [统计检验,决策风险,显著性水平]{.todo} -```{r ab-test,fig.asp="A/B 测试:功效", fig.width=5, fig.height=4, echo=FALSE} +```{r ab-test, fig.asp="A/B 测试:功效", fig.width=5, fig.height=4, echo=FALSE} ggplot(data.frame(x = c(-4, 10)), aes(x)) + stat_function( fun = dnorm, geom = "area", args = list(mean = 2, sd = 1), - fill = "gray70", alpha = .8 + fill = "gray80", alpha = .8 ) + stat_function( fun = dnorm, geom = "area", args = list(mean = 4, sd = 2), - fill = "gray50", alpha = .8 + fill = "gray40", alpha = .8 ) + - geom_vline(xintercept = 4, col = "gray40", lty = 2, lwd = 1.2) + - geom_vline(xintercept = 2, col = "gray40", lty = 2, lwd = 1.2) + - theme_minimal(base_size = 16) + geom_vline(xintercept = 4, col = "gray40", lty = 2, lwd = 1) + + geom_vline(xintercept = 2, col = "gray40", lty = 2, lwd = 1) + + theme_void() ``` Charles J. Geyer 的文章 Fuzzy and Randomized Confidence Intervals and P-Values [@Geyer2005] 文章中的图 1 名义覆盖概率的计算见 [@Blyth1960] @@ -387,7 +336,7 @@ Charles J. Geyer 的文章 Fuzzy and Randomized Confidence Intervals and P-Value 第 \@ref(chap-data-visualization) 章介绍数据可视化,分四个部分,基础元素、常用图形、字体和颜色设置。 -第 \@ref(chap-dynamic-documents) 章介绍动态文档,即 R Markdown 及其生态系统。 +第 \@ref(chap-document-elements) 章介绍动态文档,即 R Markdown 及其生态系统。 第 \@ref(chap-interactive-web-graphics) 章介绍交互图形,以常用的 **plotly** 和 **highcharter** 为主,重点介绍 R 和 JavaScript 库的对应关系。 @@ -440,14 +389,6 @@ knitr::include_graphics(path = "images/cc-by-nc-nd.png") ## 运行信息 {#sec-session-welcome .unnumbered} -书籍在 `r R.version.string` 下编译,Pandoc 版本 2.14.1,最新一次编译发生在 `r Sys.time()`。 +书籍在 `r R.version.string` 下编译,Pandoc 版本 2.14.1,最新一次编译发生在 `r format(Sys.time(), tz = "Asia/Shanghai")`。 -```{r} -xfun::session_info(packages = c( - "knitr", "rmarkdown", "bookdown", "equatiomatic", - "data.table", "DT", "kableExtra", "reactable", - "patchwork", "plotly", "shiny", - "ggplot2", "dplyr", "tidyverse" -), dependencies = FALSE) -``` diff --git a/interactive-web-graphics.Rmd b/interactive-web-graphics.Rmd index 3dc35ffb5..721064d2d 100644 --- a/interactive-web-graphics.Rmd +++ b/interactive-web-graphics.Rmd @@ -6,6 +6,8 @@ knitr::opts_chunk$set(python.reticulate = TRUE) reticulate::use_virtualenv(virtualenv = Sys.getenv("RETICULATE_PYTHON_ENV"), required = TRUE) ``` + + ```{r,include=FALSE} options(warn = -1) library(ggplot2) @@ -13,6 +15,9 @@ library(plotly, warn.conflicts = FALSE) library(highcharter) library(dygraphs) library(r2d3) + +# library(scatterD3) +# library(dygraphs) # plot_ly <- plotly::plot_ly # add_trace <- plotly::add_trace # @@ -43,6 +48,7 @@ Paul C. Bauer 的书 [Applied Data Visualization](https://bookdown.org/paul/appl [ECharts2Shiny](https://github.com/XD-DENG/ECharts2Shiny) 包将 ECharts 嵌入 shiny 框架中。 + [timevis](https://github.com/daattali/timevis) 创建交互式的时间线的时序可视化,它基于 [Vis](https://visjs.org/) 的 [vis-timeline](https://github.com/visjs/vis-timeline) 模块,支持 shiny 集成。[dygraphs](https://github.com/rstudio/dygraphs) 包基于 [dygraphs](https://github.com/danvk/dygraphs) 可视化库,将时序数据可视化,更多情况见 。[leaflet](https://github.com/rstudio/leaflet) 提供 [leaflet](https://leafletjs.com/) 的 R 接口。[rAmCharts4](https://github.com/stla/rAmCharts4) 基于 [amCharts 4](https://github.com/amcharts/amcharts4/) 库, [apexcharter](https://github.com/dreamRs/apexcharter) 提供 [apexcharts.js](https://github.com/apexcharts/apexcharts.js) 的 R 接口。还有 [billboarder](https://github.com/dreamRs/billboarder) 等。更完整地,请看 Etienne Bacher 维护的 R 包列表 [r-js-adaptation](https://github.com/etiennebacher/r-js-adaptation) 。 @@ -53,7 +59,7 @@ Paul C. Bauer 的书 [Applied Data Visualization](https://bookdown.org/paul/appl 学习 [plotly](https://github.com/ropensci/plotly) 和 [highcharter](https://github.com/jbkunst/highcharter) 为代表的 基于 JavaScript 的 R 包,共有四重境界:第一重是照着帮助文档的示例,示例有啥我们做啥;第二重是明白帮助文档中 R 函数和 JavaScript 函数的对应关系,能力达到 JS 库的功能边界;第三重是深度自定义一些扩展性的 JS 功能,放飞自我;第四重是重新造轮子,为所欲为。下面的介绍希望能帮助读者到达第二重境界。 ::: -[plotly](https://github.com/ropensci/plotly) 是一个功能非常强大的绘制交互式图形的 R 包,支持图片下载、背景图片[^plotly-logo]、工具栏[^plotly-toolbar]和注释[^plotly-annotation] 等一系列细节的自定义控制。下面结合 JavaScript 库 [plotly.js](https://github.com/plotly/plotly.js) 一起介绍,帮助文档 `?config` 没有太详细地介绍,所以我们看看 `config()` 函数中参数 `...` 和 JS 库 [plot_config.js](https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js) 中的功能函数是怎么对应的。图`r if(knitr::is_html_output()) "\\@ref(fig:custom-details)"` 中图片下载按钮对应 `toImageButtonOptions` 参数, 看 [toImageButtonOptions](https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js#L311) 源代码,可知 它接受任意数据类型,对应到 R 里面就是列表。 `watermark` 和 `displaylogo` 都是传递布尔值(TRUE/FALSE),具体根据 JS 代码中的 valType (参数值类型)决定,其它参数类似。另一个函数 [layout](https://plot.ly/r/reference/#Layout_and_layout_style_objects) 和函数 `config()` 是类似的,怎么传递参数值是根据 JS 代码来的。 +[plotly](https://github.com/ropensci/plotly) 是一个功能非常强大的绘制交互式图形的 R 包。它支持下载图片、添加水印、自定义背景图片、工具栏和注释[^plotly-annotation] 等一系列细节的自定义控制。下面结合 JavaScript 库 [plotly.js](https://github.com/plotly/plotly.js) 一起介绍,帮助文档 `?config` 没有太详细地介绍,所以我们看看 `config()` 函数中参数 `...` 和 JavaScript 库 [plot_config.js](https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js) 中的功能函数是怎么对应的。图`r if(knitr::is_html_output()) "\\@ref(fig:custom-details)"` 中图片下载按钮对应 `toImageButtonOptions` 参数, 看 [toImageButtonOptions](https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js#L311) 源代码,可知,它接受任意数据类型,对应到 R 里面就是列表。 `watermark` 和 `displaylogo` 都是传递布尔值(TRUE/FALSE),具体根据 JavaScript 代码中的 valType (参数值类型)决定,其它参数类似。另一个函数 [layout](https://plot.ly/r/reference/#Layout_and_layout_style_objects) 和函数 `config()` 是类似的,怎么传递参数值是根据 JavaScript 代码来的。 ```js toImageButtonOptions: { @@ -81,11 +87,10 @@ watermark: { ``` -[^plotly-logo]: -[^plotly-toolbar]: + [^plotly-annotation]: -```{r custom-details,fig.cap="自定义细节",eval=knitr::is_html_output()} +```{r custom-details, fig.cap="自定义细节", eval=knitr::is_html_output()} library(plotly, warn.conflicts = FALSE) plot_ly(diamonds, x = ~clarity, y = ~price, @@ -93,80 +98,69 @@ plot_ly(diamonds, ) %>% config( toImageButtonOptions = list( - format = "svg", filename = paste("plot", Sys.Date(), sep = "_"), - width = 450, height = 300 - # 设置下载图片的尺寸 https://github.com/ropensci/plotly/issues/1556#issuecomment-505833092 - ), # 还可设置为 PNG 格式,可用 rsvg 的 rsvg_pdf 函数转化为 PDF - modeBarButtons = list(list("toImage")), # 保留下载按钮 - # 完整的列表见 https://github.com/plotly/plotly.js/blob/master/src/components/modebar/buttons.js - watermark = F, - displaylogo = FALSE, # 移除 Plotly 的 logo - locale = "zh-CN", # 汉化 - # staticPlot = TRUE, # 静态图形而不是交互图形 - # modeBarButtonsToRemove = c( - # "zoom2d", "zoomIn2d", "zoomOut2d", "autoScale2d", "resetScale2d", "pan2d", - # "hoverClosestCartesian", "hoverCompareCartesian", "toggleSpikelines" - # ), # 去掉任意一个按钮 - # displayModeBar = FALSE, # 去掉整个顶部工具栏 - showLink = FALSE + format = "svg", width = 450, height = 300, + filename = paste("plot", Sys.Date(), sep = "_") + ), + modeBarButtons = list(list("toImage")), + watermark = FALSE, + displaylogo = FALSE, + locale = "zh-CN", + staticPlot = TRUE, + showLink = FALSE, + modeBarButtonsToRemove = c( + "hoverClosestCartesian", "hoverCompareCartesian", + "zoom2d", "zoomIn2d", "zoomOut2d", + "autoScale2d", "resetScale2d", "pan2d", + "toggleSpikelines" + ) ) %>% layout( + template = "plotly_dark", images = list( source = "https://images.plot.ly/language-icons/api-home/r-logo.png", xref = "paper", yref = "paper", - x = 1.0, + x = 1.00, y = 0.25, sizex = 0.2, sizey = 0.2, opacity = 0.5 ), annotations = list( - text = "watermark", # 文本注释 + text = "DRAFT", # 水印文本 + textangle = -30, # 逆时针旋转 30 度 font = list( - size = 40, # 字号 - color = "red", # 颜色 - family = "Times New Roman" # 字族 + size = 40, # 字号 + color = "gray", # 颜色 + family = "Times New Roman" # 字族 ), - opacity = 0.2, # 字体透明度 + opacity = 0.2, # 透明度 xref = "paper", yref = "paper", x = 0.5, y = 0.5, - showarrow = FALSE # 去掉箭头指示 + showarrow = FALSE # 去掉箭头指示 ) ) ``` -函数 `ggplotly()` 将 ggplot 对象转化为交互式 plotly 对象 +Table: (\#tab:plotly-config) 交互图形的设置函数 `config()` 各个参数及其作用(部分) -```{r} -gg <- ggplot(faithful, aes(x = eruptions, y = waiting)) + - stat_density_2d(aes(fill = ..level..), geom = "polygon") + - xlim(1, 6) + - ylim(40, 100) -``` +| 参数 | 作用 | +| :--------------- | :---------------------------------------------- | +| displayModeBar | 是否显示交互图形上的工具条,默认显示 `TRUE`[^plotly-toolbar]。 | +| modeBarButtons | 工具条上保留的工具,如下载 `"toImage"`,缩放 `"zoom2d"`[^modeBarButtons]。| +| modeBarButtonsToRemove | 工具条上要移除的工具,如下载和缩放图片 `c("toImage", "zoom2d")`。 | +| toImageButtonOptions | 工具条上下载图片的选项设置,包括名称、类型、尺寸等。[^toImageButtonOptions]| +| displaylogo | 是否交显示互图形上 Plotly 的图标,默认显示 `TRUE`[^plotly-logo]。 | +| staticPlot | 是否将交互图形转为静态图形,默认 `FALSE`。 | +| locale | 本土化语言设置,比如 `"zh-CN"` 表示中文。 | -静态图形 - -```{r} -gg -``` - -转化为 plotly 对象 - -```{r,eval=knitr::is_html_output()} -ggplotly(gg) -``` - -添加动态点的注释,比如点横纵坐标、坐标文本,整个注释标签的样式(如背景色) - -```{r,eval=knitr::is_html_output()} -ggplotly(gg, dynamicTicks = "y") %>% - style(., hoveron = "points", hoverinfo = "x+y+text", - hoverlabel = list(bgcolor = "white")) -``` +[^plotly-logo]: 。 +[^plotly-toolbar]: 。 +[^modeBarButtons]: 完整的列表见 。 +[^toImageButtonOptions]: 设置下载图片的尺寸,还可设置为 PNG 格式,SVG 格式图片,可借助 **rsvg** 的 `rsvg_pdf()` 函数转化为 PDF 格式 。 ## 散点图 {#sec-plotly-scatter} @@ -188,6 +182,13 @@ Table: (\#tab:plotly-scatter-functions) 散点图类型 plotly.js 提供很多图层用于绘制各类图形 +```{r plotly-scatterplot,fig.cap="其它常见图形",eval=knitr::is_html_output()} +# 折线图 +plot_ly(Orange, + x = ~age, y = ~circumference, color = ~Tree, + type = "scatter", mode = "markers" +) +``` ## 条形图 {#sec-plotly-barplot} @@ -238,9 +239,9 @@ htmltools::tagList(p11, p12, p13, p14) 其它常见的图形还要折线图、直方图、箱线图和提琴图 -```{r plotly-lineplot,fig.cap="其它常见图形",eval=knitr::is_html_output()} +```{r plotly-lineplot, fig.cap="折线图", eval=knitr::is_html_output()} # 折线图 -p21 <- plot_ly(Orange, +plot_ly(Orange, x = ~age, y = ~circumference, color = ~Tree, type = "scatter", mode = "markers+lines" ) @@ -265,10 +266,10 @@ dat <- data.frame( `hoverinfo = "text"` 表示 tooltips 使用指定的 text 映射,而 `visible = "legendonly"` 表示图层默认隐藏不展示,只在图例里显示,有时候很多条线,默认只是展示几条而已。举例如下 -```{r multiple-axes,fig.cap="双轴图",eval=knitr::is_html_output()} +```{r multiple-axes, fig.cap="双轴图", eval=knitr::is_html_output()} plot_ly(data = dat) %>% add_bars( - x = ~dt, y = ~search_qv, color = I("#4285f4"), name = "搜索 QV", + x = ~dt, y = ~search_qv, color = I("gray80"), name = "搜索 QV", text = ~ paste0( "日期:", dt, "
", "点击 QV:", format(valid_click_qv, big.mark = ","), "
", @@ -278,7 +279,7 @@ plot_ly(data = dat) %>% hoverinfo = "text" ) %>% add_bars( - x = ~dt, y = ~valid_click_qv, color = I("#FBBC05"), name = "点击 QV", + x = ~dt, y = ~valid_click_qv, color = I("gray60"), name = "点击 QV", text = ~ paste0( "日期:", dt, "
", "点击 QV:", format(valid_click_qv, big.mark = ","), "
", @@ -288,10 +289,10 @@ plot_ly(data = dat) %>% hoverinfo = "text" ) %>% add_lines( - x = ~dt, y = ~qv_ctr, name = "QV_CTR", yaxis = "y2", color = I("#34A853"), + x = ~dt, y = ~qv_ctr, name = "QV_CTR", yaxis = "y2", color = I("gray40"), text = ~ paste("QV_CTR:", scales::percent(qv_ctr, accuracy = 0.01), "
"), hoverinfo = "text", - line = list(shape = "spline", color = "Set1", width = 3, dash = "line") + line = list(shape = "spline", width = 3, dash = "line") ) %>% layout( title = "", @@ -318,100 +319,41 @@ plot_ly(data = dat) %>% ## 直方图 {#sec-plotly-histogram} -```{r plotly-histogram,fig.cap="其它常见图形",eval=knitr::is_html_output()} -# 分组直方图 -p22 <- plot_ly(iris, - x = ~Sepal.Length, +```{r plotly-histogram, fig.cap="分组直方图", eval=knitr::is_html_output()} +plot_ly(iris, + x = ~Sepal.Length, colors = "Greys", color = ~Species, type = "histogram" ) ``` ## 箱线图 {#sec-plotly-boxplot} -```{r plotly-boxplot,fig.cap="其它常见图形",eval=knitr::is_html_output()} +```{r plotly-boxplot, fig.cap="箱线图", eval=knitr::is_html_output()} # 箱线图 -p23 <- plot_ly(diamonds, - x = ~clarity, y = ~price, +plot_ly(diamonds, + x = ~clarity, y = ~price, colors = "Greys", color = ~clarity, type = "box" ) - -# 箱线图 -plot_ly(diamonds, x = ~cut, y = ~price) %>% - add_boxplot() ``` -```{r,eval=FALSE} -# 不同的类别使用不同的颜色上色 -plot_ly(diamonds, x = ~clarity, y = ~price, color = ~clarity) %>% - add_boxplot() - -# 使用 colors 参数设置调色板 -plot_ly(diamonds, - x = ~clarity, y = ~price, - color = ~clarity, colors = "Set1" -) %>% - add_boxplot() - -# 或者使用 qplot 式绘图风格 -plot_ly(diamonds, - x = ~clarity, y = ~price, - color = ~clarity, colors = "Set1", type = "box" -) - -# 分组箱线图 https://github.com/ropensci/plotly/issues/994 -plot_ly(diamonds, - x = ~cut, y = ~price, - color = ~clarity, type = "box" -) %>% - layout(boxmode = "group") - -# 修改图例的标题,R 的嵌套 list 对象对应于 JS 的 JSON 数据对象 -plot_ly(diamonds, - x = ~cut, y = ~price, - color = ~clarity, colors = "Set1", type = "box" -) %>% - layout( - boxmode = "group", - legend = list( - bgcolor = "white", - title = list(text = "clarity") - ) - ) - -# 提琴图 -plot_ly(diamonds, x = ~cut, y = ~price) %>% - add_trace(type = "violin") +## 提琴图 {#sec-plotly-violin} -plot_ly(diamonds, - x = ~cut, y = ~price, split = ~cut, type = "violin", +```{r plotly-violin, fig.cap="提琴图", eval=knitr::is_html_output()} +plot_ly(sleep, + x = ~group, y = ~extra, split = ~group, + type = "violin", box = list(visible = T), meanline = list(visible = T) -) %>% - layout( - xaxis = list(title = "Cut"), - yaxis = list(title = "Price", zeroline = F) - ) -``` - -## 提琴图 {#sec-plotly-violin} - -```{r plotly-violin,fig.cap="其它常见图形",eval=knitr::is_html_output()} -# 提琴图 -p24 <- plot_ly(sleep, - x = ~group, y = ~extra, split = ~group, type = "violin", - box = list(visible = T), - meanline = list(visible = T) ) - -htmltools::tagList(p21, p22, p23, p24) ``` plotly 包含图层 27 种,见表 \@ref(tab:add-layer) -```{r add-layer,echo=FALSE} +```{r add-layer, echo=FALSE} library(plotly, warn.conflicts = FALSE) -knitr::kable(matrix(grep("add_*", x = ls("package:plotly"), value = T), ncol = 3), - col.names = c("A", "B", "C"), caption = "图层") +knitr::kable(matrix(grep("add_*", x = ls("package:plotly"), value = T), ncol = 3), + col.names = c("A", "B", "C"), caption = "图层" +) ``` @@ -420,7 +362,7 @@ knitr::kable(matrix(grep("add_*", x = ls("package:plotly"), value = T), ncol = 3 简单图形 scatter,分布图几类,其中 scatter、heatmap、scatterpolar 支持 WebGL 绘图引擎 -```{r plotly-bubble,fig.cap="气泡图",eval=knitr::is_html_output()} +```{r plotly-bubble, fig.cap="气泡图", eval=knitr::is_html_output()} # https://plotly.com/r/bubble-charts/ dat <- diamonds[, .( carat = mean(carat), @@ -429,7 +371,7 @@ dat <- diamonds[, .( ), by = .(cut)] plot_ly( - data = dat, + data = dat, colors = "Greys", x = ~carat, y = ~price, color = ~cut, size = ~cnt, type = "scatter", mode = "markers", marker = list( @@ -455,10 +397,11 @@ plot_ly( ## 曲线图 {#sec-plotly-spline} -```{r plotly-spline,fig.cap="平滑曲线图",eval=knitr::is_html_output()} +```{r plotly-spline, fig.cap="平滑曲线图", eval=knitr::is_html_output()} plot_ly( - x = c(1, 2.2, 3), y = c(5.3, 6, 7), type = "scatter", - mode = "markers+lines", line = list(shape = "spline"), color = I("#EA4335") + x = c(1, 2.2, 3), y = c(5.3, 6, 7), + type = "scatter", color = I("gray40"), + mode = "markers+lines", line = list(shape = "spline") ) %>% add_annotations( x = 2, y = 6, size = I(100), @@ -477,7 +420,7 @@ plot_ly( ```{r plotly-tozeroy,eval=knitr::is_html_output()} plot_ly( data = PlantGrowth, y = ~weight, - color = ~group, + color = ~group, colors = "Greys", type = "scatter", line = list(shape = "spline"), mode = "lines", fill = "tozeroy" ) @@ -488,9 +431,8 @@ plot_ly( 其他基础图形 -```{r plotly-heatmap,eval=knitr::is_html_output()} -# Heatmaps -plot_ly(z = volcano, type = 'heatmap') +```{r plotly-heatmap, eval=knitr::is_html_output()} +plot_ly(z = volcano, type = 'heatmap', colors = "Greys") ``` @@ -503,7 +445,7 @@ plot_ly(z = volcano, type = 'heatmap') ```{r mapbox-quakes,eval=knitr::is_html_output(),fig.cap="斐济地震数据"} data("quakes") plot_mapbox( - data = quakes, + data = quakes, colors = "Greys", lon = ~long, lat = ~lat, color = ~mag, size = 2, type = "scattermapbox", @@ -526,7 +468,7 @@ plot_mapbox( ) ``` -```{r bubble-map,eval=knitr::is_html_output(),fig.cap="斐济地震带分布",echo=TRUE} +```{r bubble-map, eval=knitr::is_html_output(), fig.cap="斐济地震带分布"} plot_ly( data = quakes, lon = ~long, lat = ~lat, @@ -536,7 +478,7 @@ plot_ly( "震级:", mag ), marker = list( - color = ~mag, + color = ~mag, size = 10, opacity = 0.8, line = list(color = "white", width = 1) ) @@ -567,13 +509,13 @@ plot_ly( ``` -```{r choropleth-map,eval=knitr::is_html_output(),fig.cap="美国各州收入"} +```{r choropleth-map, eval=knitr::is_html_output(), fig.cap="美国各州收入"} dat = data.frame(state.x77, stats = rownames(state.x77), stats_abbr = state.abb) plot_ly(data = dat, type = "choropleth", locations = ~stats_abbr, locationmode = "USA-states", - colorscale = "Viridis", + colorscale = "Greys", z = ~Income ) %>% layout(geo = list(scope = "usa")) @@ -657,7 +599,7 @@ knitr::include_graphics(path = "screenshots/rasterly-rides.png") ``` -## 三维图 {#sec-plotly-3d} +## 三维图 (plotly) {#sec-plotly-3d} ```{r plotly-3d,fig.cap="三维图形", eval=knitr::is_html_output()} @@ -707,7 +649,7 @@ plot_ly(data = df) %>% y = ~y, yend = ~y, color = ~resource, mode = "lines", - colors = "Set2", + colors = "Greys", line = list(width = 20), showlegend = F, hoverinfo = "text", @@ -782,8 +724,14 @@ dat <- dat[order(-dat$count), ] %>% dat$complaint <- reorder(x = dat$complaint, X = dat$count, FUN = function(x) 1/(1 + x)) plot_ly(data = dat) %>% - add_bars(x = ~complaint, y = ~count, showlegend = F, color = I("#4285f4")) %>% - add_lines(x = ~complaint, y = ~cumulative, yaxis = "y2", showlegend = F) %>% + add_bars( + x = ~complaint, y = ~count, + showlegend = F, color = I("gray60") + ) %>% + add_lines( + x = ~complaint, y = ~cumulative, yaxis = "y2", + showlegend = F, color = I("gray40") + ) %>% layout( yaxis2 = list( tickfont = list(color = "black"), @@ -834,12 +782,14 @@ dat <- data.frame( value = c(39, 27.4, 20.6, 11, 2) ) %>% transform(percent = value / cumsum(value)) + plot_ly(data = dat) %>% add_trace( type = "funnel", y = ~category, x = ~value, - color = ~category, + color = ~category, + colors = "Set2", text = ~ paste0(value, "
", sprintf("%.2f%%", 100*percent)) , hoverinfo = "text", showlegend = FALSE @@ -876,12 +826,12 @@ plot_ly( type = "scatterpolar", mode = "markers", fill = "toself" ) %>% add_trace( - r = c(39, 28, 8, 7, 28, 39), + r = c(39, 28, 8, 7, 28, 39), color = I("gray40"), theta = c("数学", "物理", "化学", "英语", "生物", "数学"), name = "学生 A" ) %>% add_trace( - r = c(1.5, 10, 39, 31, 15, 1.5), + r = c(1.5, 10, 39, 31, 15, 1.5), color = I("gray80"), theta = c("数学", "物理", "化学", "英语", "生物", "数学"), name = "学生 B" ) %>% @@ -900,7 +850,7 @@ plot_ly( 盈亏图 -```{r waterfall,fig.cap="瀑布图", eval=knitr::is_html_output()} +```{r waterfall, fig.cap="瀑布图", eval=knitr::is_html_output()} library(plotly) library(dplyr) @@ -932,7 +882,7 @@ dat[nrow(dat), "text"] <- "累计" plotly::plot_ly(dat, x = ~x, y = ~y, measure = ~measure, type = "waterfall", text = ~text, textposition = "outside", - name = "收支", hoverinfo = "final", + name = "收支", hoverinfo = "final", connector = list(line = list(color = "gray")), increasing = list(marker = list(color = "#66C2A5")), decreasing = list(marker = list(color = "#FC8D62")), @@ -959,25 +909,31 @@ plotly 绘制 treemap 和 sunburst 图比较复杂,接口不友好, [plotme] ## 调色板 {#sec-plotly-color-palette} -```{r,eval=knitr::is_html_output(),warning=FALSE} +```{r, eval=knitr::is_html_output(), warning=FALSE} plot_ly(iris, x = ~Petal.Length, y = ~Petal.Width, mode = "markers", type = "scatter", color = ~ Sepal.Length > 6, colors = c("#132B43", "#56B1F7") ) -plot_ly(iris, x = ~Petal.Length, y = ~Petal.Width, color = ~Sepal.Length>6, - mode = "markers", type = "scatter") +plot_ly(iris, + x = ~Petal.Length, y = ~Petal.Width, color = ~ Sepal.Length > 6, + mode = "markers", type = "scatter" +) -plot_ly(iris, x = ~Petal.Length, y = ~Petal.Width, color = ~Sepal.Length>6, - mode = "markers", type = "scatter", colors = "Set2") +plot_ly(iris, + x = ~Petal.Length, y = ~Petal.Width, color = ~ Sepal.Length > 6, + mode = "markers", type = "scatter", colors = "Set2" +) -plot_ly(iris, x = ~Petal.Length, y = ~Petal.Width, color = ~Sepal.Length>6, - mode = "markers", type = "scatter", colors = "Set1") +plot_ly(iris, + x = ~Petal.Length, y = ~Petal.Width, color = ~ Sepal.Length > 6, + mode = "markers", type = "scatter", colors = "Set1" +) ``` 构造 20 个类别 超出 Set1 调色板的范围,会触发警告说 Set1 没有那么多色块,但还是返回足够多的色块,也可以使用 `viridis`、`plasma`、`magma` 或 `inferno` 调色板 -```{r plotly-colorpalette,fig.cap="调色板",eval=knitr::is_html_output()} +```{r plotly-colorpalette, fig.cap="调色板", eval=knitr::is_html_output()} dat <- data.frame( dt = rep(seq( from = as.Date("2021-01-01"), @@ -988,17 +944,17 @@ dat <- data.frame( ) # viridis plot_ly(dat, - x = ~dt, y = ~qv, color = ~bu, + x = ~dt, y = ~qv, color = ~bu, mode = "markers", type = "scatter", colors = "viridis" ) ``` -## 面积图 {#sec-highcharter} +## 堆积图 (highcharter) {#sec-highcharter} Joshua Kunst 在他的博客里 补充了很多数据可视化案例,另一个关键的参考资料是 [highcharts API 文档](https://api.highcharts.com/highcharts/),文档主要分两部分全局选项 `Highcharts.setOptions` 和绘图函数 `Highcharts.chart`。下面以 `data_to_boxplot()` 为例解析 R 中的数据结构是如何和 highcharts 的 JSON 以及绘图函数对应的。 -```{r mapping-data,eval=knitr::is_html_output(),fig.cap="两种从数据到图形的映射方式"} +```{r mapping-data, eval=knitr::is_html_output(), fig.cap="两种从数据到图形的映射方式"} library(highcharter) highchart() %>% hc_xAxis(type = "category") %>% @@ -1121,7 +1077,7 @@ Highcharts.chart('container', { 对应到 R 包 **highcharter** 中,绘图代码如下: -```{r hc-area,eval=knitr::is_html_output(),fig.cap="1940年至2017年美国和俄罗斯核武器数量变化"} +```{r hc-area, eval=knitr::is_html_output(), fig.cap="1940年至2017年美国和俄罗斯核武器数量变化"} library(highcharter) options(highcharter.theme = hc_theme_hcrt(tooltip = list(valueDecimals = 2))) @@ -1186,7 +1142,7 @@ methods(hchart) 更多 API 细节描述见 。 桑基图描述能量的流动 [^sankey] -```{r,eval=knitr::is_html_output(),fig.cap="桑基图"} +```{r, eval=knitr::is_html_output(), fig.cap="桑基图"} library(jsonlite) # 转化为 JSON 格式的字符串 dat <- toJSON(data.frame( @@ -1236,193 +1192,6 @@ hchart(sleep, "line", hcaes(ID, extra, group = group)) ::: -## 动画 I {#sec-highcharter-animation} - -动态条形图 - -```{r,eval=FALSE} -library(highcharter) # highcharter 的依赖也很重 -library(idbr) -library(purrr) -library(dplyr) # 未来替代一下 - -# the US Census Bureau International Data Base API -# 美国人口普查局国际数据库 API -idb_api_key("35f116582d5a89d11a47c7ffbfc2ba309133f09d") -yrs <- seq(1980, 2030, by = 5) - -df <- map_dfr(c("male", "female"), function(sex) { - transform(get_idb("US", yrs, sex = sex), sex_label = sex) -}) - -df <- df %>% - transform(population = pop * ifelse(sex_label == "male", -1, 1)) - -# 数据变换 -series <- df %>% - group_by(sex_label, age) %>% - do(data = list(sequence = .$population)) %>% - ungroup() %>% - group_by(sex_label) %>% - do(data = .$data) %>% - mutate(name = sex_label) %>% - list_parse() - -maxpop <- max(abs(df$population)) - -xaxis <- list( - categories = sort(unique(df$age)), - reversed = FALSE, tickInterval = 5, - labels = list(step = 5) -) - -highchart() %>% - hc_chart(type = "bar") %>% - hc_motion( - enabled = TRUE, - labels = yrs, - series = c(0, 1), - autoplay = TRUE, - updateInterval = 10, - playIcon = "fa fa-play", - pauseIcon = "fa fa-pause" - ) %>% - hc_add_series_list(series) %>% - hc_plotOptions( - series = list(stacking = "normal"), - bar = list(groupPadding = 0, pointPadding = 0, borderWidth = 0) - ) %>% - hc_tooltip( - shared = FALSE, - formatter = JS(" - function() { - return '' + this.series.name + - ', age ' + this.point.category + - '
' + 'Population: ' + - Highcharts.numberFormat(Math.abs(this.point.y), 0); - } - ") - ) %>% - hc_yAxis( - labels = list( - formatter = JS(" - function() { - return Math.abs(this.value) / 1000000 + 'M'; - } - ") - ), - tickInterval = 0.5e6, - min = -maxpop, - max = maxpop - ) %>% - hc_xAxis( - xaxis, - rlist::list.merge(xaxis, list(opposite = TRUE, linkedTo = 0)) - ) -``` - -动态气泡图 - -```{r,eval=FALSE} -highchart() %>% - hc_xAxis(min = 0, max = 10) %>% - hc_yAxis(min = 0, max = 10) %>% - hc_motion(enabled = TRUE) %>% - hc_add_series( - type = "bubble", - data = list( - list( - sequence = list( - list(x = 1, y = 1, z = 10), - list(x = 2, y = 3, z = 5), - list(x = 3, y = 5, z = 8) - ) - ) - ) - ) -``` - - -```{r,eval=FALSE} -highchart() %>% - hc_xAxis(min = 0, max = 10) %>% - hc_yAxis(min = 0, max = 10) %>% - hc_add_series( - type = "bubble", - name = "气泡图", - data = list( - list(x = 1, y = 1, z = 10) - ) - ) -``` - -动态散点图 - -```{r,eval=FALSE} -library(highcharter) - -highchart() %>% - hc_chart(type = "scatter") %>% - hc_yAxis(max = 6, min = 0) %>% - hc_xAxis(max = 6, min = 0) %>% - hc_add_series( - name = "Australia", - data = list( - list(sequence = list(c(1, 1), c(2, 2), c(3, 3), c(4, 4))) - ) - ) %>% - hc_add_series( - name = "United States", - data = list( - list(sequence = list(c(0, 0), c(3, 2), c(4, 3), c(4, 1))) - ) - ) %>% - hc_add_series( - name = "China", - data = list( - list(sequence = list(c(3, 2), c(2, 2), c(1, 1), c(2, 5))) - ) - ) %>% - hc_motion( - enabled = TRUE, - labels = 2000:2003, - series = c(0, 1, 2) - ) -``` - -动态柱状图 - -```{r,eval=FALSE} -highchart() %>% - hc_chart(type = "column") %>% - hc_yAxis(max = 6, min = 0) %>% - hc_add_series(name = "A", data = c(2, 3, 4), zIndex = -10) %>% - hc_add_series( - name = "B", - data = list( - list(sequence = c(1, 2, 3, 4)), - list(sequence = c(3, 2, 1, 3)), - list(sequence = c(2, 5, 4, 3)) - ) - ) %>% - hc_add_series( - name = "C", - data = list( - list(sequence = c(3, 2, 1, 3)), - list(sequence = c(2, 5, 4, 3)), - list(sequence = c(1, 2, 3, 4)) - ) - ) %>% - hc_motion( - enabled = TRUE, - labels = 2000:2003, - series = c(1, 2), - playIcon = "fa fa-play", - pauseIcon = "fa fa-pause" - ) -``` - - ## 时序图 {#sec-dygraphs} [dygraphs](https://github.com/rstudio/dygraphs) 专门用来绘制交互式时间序列图形,下面以美团股价为例,展示时间窗口筛选、坐标轴名称、刻度标签、注释、事件标注、缩放等功能 @@ -1463,7 +1232,7 @@ dygraph(meituan[, "3690.HK.Adjusted"], main = "美团股价走势") |> dyUnzoom() ``` -## 图形导出 {#sec-export} +## 导出静态图形 {#sec-export} orca (Open-source Report Creator App) 软件针对 plotly.js 库渲染的图形具有很强的导出功能,[安装 orca](https://github.com/plotly/orca#installation) 后,`plotly::orca()` 函数可以将基于 htmlwidgets 的 plotly 图形对象导出为 PNG、PDF 和 SVG 等格式的高质量静态图片。 @@ -1472,38 +1241,41 @@ p <- plot_ly(x = 1:10, y = 1:10, color = 1:10) orca(p, "plot.svg") ``` +## 静态图形转交互图形 {#sec-ggplotly} -## 地图 II {#sec-echarts4r-map} +函数 `ggplotly()` 将 ggplot 对象转化为交互式 plotly 对象 -```{r,include=FALSE} -library(echarts4r) -library(leaflet) -library(leafletCN) -library(maptools) -library(leaflet.extras) +```{r} +gg <- ggplot(faithful, aes(x = eruptions, y = waiting)) + + stat_density_2d(aes(fill = ..level..), geom = "polygon") + + xlim(1, 6) + + ylim(40, 100) ``` -相比于 **plotly**,**echarts4r** 更加轻量,这得益于 JavaScript 库 [Apache ECharts](https://github.com/apache/echarts)。 -前者 MIT 协议,后者采用 Apache-2.0 协议,都可以商用。Apache ECharts 是 Apache 旗下顶级开源项目,由百度前端技术团队贡献,中文文档也比较全,学习起来门槛会低一些。 +静态图形 -```{r fiji-quakes-echarts4r, fig.cap="斐济地震带", eval=knitr::is_html_output()} -library(echarts4r) -quakes |> - e_charts(long) |> - e_geo( - roam = TRUE, - boundingCoords = list( - c(185, - 10), - c(165, -40) - ) - ) |> - e_scatter( - lat, mag, - coord_system = "geo" - ) |> - e_visual_map(mag, scale = e_scale) +```{r} +gg +``` + +转化为 plotly 对象 + +```{r,eval=knitr::is_html_output()} +ggplotly(gg) +``` + +添加动态点的注释,比如点横纵坐标、坐标文本,整个注释标签的样式(如背景色) + +```{r,eval=knitr::is_html_output()} +ggplotly(gg, dynamicTicks = "y") %>% + style(., hoveron = "points", hoverinfo = "x+y+text", + hoverlabel = list(bgcolor = "white")) ``` + + +## 地图 II {#sec-echarts4r-map} + **leaflet** 包制作地图,斐济是太平洋上的一个岛国,处于板块交界处,经常发生地震,如下图所示,展示 1964 年来 1000 次震级大于 4 级的地震活动。 ```{r fiji-quakes-html, fig.cap="斐济地震带", eval=FALSE} @@ -1559,7 +1331,7 @@ knitr::include_graphics(path = "screenshots/leaflet-heatmap.png") **leafletCN** 提供汉化 -```{r,eval=FALSE,echo=TRUE} +```{r, eval=FALSE, echo=TRUE} # 地图默认放大倍数 zoom <- 4 # 地图可以放大的倍数区间 @@ -1608,10 +1380,10 @@ leaflet(quakes) |> ``` -## 动画 II {#sec-echarts4r-animation} +## 动画 {#sec-echarts4r-animation} -```{r echarts4r-animation-gapminder,eval=knitr::is_html_output()} +```{r echarts4r-animation-gapminder, eval=knitr::is_html_output()} # https://d.cosx.org/d/422311 library(purrr) library(echarts4r) @@ -1697,14 +1469,36 @@ mtcars |> ")) ``` +## 三维图 (rgl) {#sec-rgl-3d} + +[ggrgl](https://github.com/coolbutuseless/ggrgl) + +```{r rgl-3d, fig.cap="三维世界地图", fig.width=4, fig.height=4, eval=knitr::is_html_output()} +library(rgl) +lat <- matrix(seq(90, -90, len = 50) * pi / 180, 50, 50, byrow = TRUE) +long <- matrix(seq(-180, 180, len = 50) * pi / 180, 50, 50) +r <- 6378.1 # radius of Earth in km +x <- r * cos(lat) * cos(long) +y <- r * cos(lat) * sin(long) +z <- r * sin(lat) +# 调整视角 +rgl.viewpoint( theta = 0, phi = 15, fov = 60, zoom = 0.5, interactive = TRUE) + +persp3d(x, y, z, + col = "white", xlab = "", ylab = "", zlab = "", + texture = system.file("textures/world.png", package = "rgl"), + specular = "black", axes = FALSE, box = FALSE, + normal_x = x, normal_y = y, normal_z = z +) +``` ## 网络图 {#sec-network-analysis} [gephi](https://github.com/gephi/gephi) 探索和可视化网络图 GraphViz ```{r} -library(igraph) +# library(igraph) ``` ### networkD3 {#subsec-networkD3} diff --git a/linear-models.Rmd b/linear-models.Rmd index 0c41fd90b..7a0ce6699 100644 --- a/linear-models.Rmd +++ b/linear-models.Rmd @@ -1,6 +1,7 @@ # 线性模型 {#chap-linear-models} ```{r,include=FALSE} +source("_common.R") library(magrittr) library(ggplot2) library(gganimate) @@ -171,25 +172,7 @@ Regression Deletion Diagnostics `?influence.measures` -```{r anscombe-base,fig.cap="模型诊断很重要", fig.process=embed_math_fonts, dev = ifelse(knitr::is_html_output(), 'svg', ifelse(knitr::is_latex_output(), 'pdf', 'png')), fig.asp=1} -library(extrafont) # 注册字体 CM Roman 到 PDF 设备 -data(anscombe) -form <- paste(paste0("y", seq(4)), paste0("x", seq(4)), sep = "~") # form <- sprintf('y%d ~ x%d', 1:4, 1:4) -fit <- lapply(form, lm, data = anscombe) -par(mfrow = c(2, 2), mar = 0.1 + c(4, 4, 1, 1), oma = c(0, 0, 2, 0), family = "CM Roman") -for (i in 1:4) { - plot(as.formula(form[i]), - data = anscombe, col = "black", - pch = 19, cex = 1.2, - xlim = c(3, 19), ylim = c(3, 13), - xlab = as.expression(substitute(bold(x[i]), list(i = i))), - ylab = as.expression(substitute(bold(y[i]), list(i = i))) - ) - abline(fit[[i]], col = "red", lwd = 2) - text(7, 12, bquote(bold(R)^2 == .(round(summary(fit[[i]])$r.squared, 3)))) -} -mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.2) -``` + ```{r anscombe,fig.cap="线性模型可能在欺骗你",fig.asp=1} library(ggplot2) library(patchwork) @@ -238,7 +221,13 @@ data(rock) ## 1888 年瑞士生育率分析 {#sec-swiss} -1888 年瑞士生育率和社会经济指标数据,各个指标都是百分比的形式,探索性分析 +1888 年,瑞士开始进入一个人口转变的阶段,从发展中国家的高出生率开始下滑。数据集 swiss 记录了 1888 年瑞士 47 个说法语的省份的生育率和社会经济指标数据,下面是数据集的部分 + +```{r view-swiss-data, out.lines=6, echo=FALSE} +swiss +``` + +Fertility(生育率,采用常见的标准生育率统计口径)、Agriculture(男性从事农业生产的比例)、Examination(应征者在军队考试中获得最高等级的比例)、Education(应征者有小学以上教育水平的比例)、Catholic(信仰天主教的比例)、Infant.Mortality(婴儿死亡率,仅考虑出生一年内死亡),各个指标都统一标准化为百分比的形式。其中,Examination 和 Education 是 1887 年、1888 年和 1889 年的平均值。瑞士 182 个地区 1888 年及其它年份的数据可从[网站](https://opr.princeton.edu/archive/pefp/switz.aspx)获得。 ```{r parcoord-swiss,eval=FALSE,echo=FALSE} # GGally 依赖 reshape 和 plyr,reshape 和 plyr 不再开发新特性了,但是 Hadley 仍然在为其续命,同时也意味着稳定,只要还能保证在新的 R 软件中安装运行。如果有更稳定更好的 R 包替换 GGally,我会欣然采纳 diff --git a/machine-learning.Rmd b/machine-learning.Rmd index 9c688bc25..41f5f9f5e 100644 --- a/machine-learning.Rmd +++ b/machine-learning.Rmd @@ -1,4 +1,6 @@ -# 机器学习 {#chap-machine-learning} +# (PART) 机器学习 {-} + +# 介绍 {#chap-machine-learning .unnumbered} 机器学习与统计的关系 diff --git a/natural-language-processing.Rmd b/natural-language-processing.Rmd index 482ef62b9..d6f46c1c5 100644 --- a/natural-language-processing.Rmd +++ b/natural-language-processing.Rmd @@ -4,6 +4,8 @@ [Dmitriy Selivanov](https://dsnotes.com/) 基于 C++ 开发的 [text2vec](https://github.com/dselivanov/text2vec) 在词嵌入、主题建模中具有极高的计算效率。 [doc2vec](https://github.com/bnosac/doc2vec) 分布式文档表示和 [word2vec](https://github.com/bnosac/word2vec) 分布式词表示, [fastTextR](https://github.com/FlorianSchwendinger/fastTextR) 将 [fastText](https://github.com/facebookresearch/fastText) 库引入 R 生态。 +[udpipe](https://github.com/bnosac/udpipe) + [textmineR](https://github.com/TommyJones/textmineR) 可用于文本挖掘和主题模型, [topicmodels](https://cran.r-project.org/web/packages/topicmodels/index.html) 包 [@topicmodels_2011_JSS] 基于C/C++实现 Latent Dirichlet Allocation LDA 模型和 Correlated Topics Models (CTM) [lda](https://cran.r-project.org/web/packages/lda/) 也是基于 C 语言实现 fast collapsed Gibbs sampler 用于 LDA,更多关于文本挖掘的主题包含在自然语言处理的视图内[^nlp-task-view]。 diff --git a/neural-networks.Rmd b/neural-networks.Rmd index 88d327411..60c3ddee2 100644 --- a/neural-networks.Rmd +++ b/neural-networks.Rmd @@ -16,6 +16,90 @@ reticulate::use_virtualenv(virtualenv = Sys.getenv("RETICULATE_PYTHON_ENV"), req Norm Matloff 等开发的 [polyreg](https://github.com/matloff/polyreg) 包以多元多项式回归替代神经网络,Brian Ripley 开发的 nnet 包以单层前馈神经网络用于多项对数线性模型。 +以 MacOS 系统为例,首先需要配置 Python 开发环境,这里采用 virtualenv 创建一个虚拟环境,然后安装本章需要的两个 Python 模块:tensorflow 和 mxnet。 + +```bash +brew install virtualenv +RETICULATE_PYTHON_ENV=/opt/.virtualenvs/r-tensorflow +virtualenv -p /usr/bin/python3 $RETICULATE_PYTHON_ENV +pip3 install tensorflow mxnet +``` + + +## tensorflow {#sec-tensorflow} + +```{r} +library(tensorflow) +library(keras) +``` + +```{r} +mnist <- dataset_mnist() +``` + + +```{r} +x_train <- mnist$train$x +y_train <- mnist$train$y +x_test <- mnist$test$x +y_test <- mnist$test$y +``` + + +```{r} +# reshape +x_train <- array_reshape(x_train, c(nrow(x_train), 784)) +x_test <- array_reshape(x_test, c(nrow(x_test), 784)) +# rescale +x_train <- x_train / 255 +x_test <- x_test / 255 +``` + + +```{r} +y_train <- to_categorical(y_train, 10) +y_test <- to_categorical(y_test, 10) +``` + + +```{r} +model <- keras_model_sequential() +model %>% + layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% + layer_dropout(rate = 0.4) %>% + layer_dense(units = 128, activation = 'relu') %>% + layer_dropout(rate = 0.3) %>% + layer_dense(units = 10, activation = 'softmax') +``` + + +```{r} +summary(model) +``` + + +```{r} +model %>% compile( + loss = 'categorical_crossentropy', + optimizer = optimizer_rmsprop(), + metrics = c('accuracy') +) +``` + + +```{r} +history <- model %>% fit( + x_train, y_train, + epochs = 30, batch_size = 128, + validation_split = 0.2 +) +``` + + +```{r} +plot(history) +``` + ## mxnet {#sec-mxnet} ::: {.rmdinfo data-latex="{信息}"} diff --git a/numerical-optimization.Rmd b/numerical-optimization.Rmd index a9f892c41..3954be922 100644 --- a/numerical-optimization.Rmd +++ b/numerical-optimization.Rmd @@ -546,6 +546,13 @@ wireframe( ) ``` + +```{r vector-field, eval=FALSE, fig.cap="香蕉函数的梯度向量场", fig.width=5.5, fig.height=5} +r <- raster::rasterFromXYZ(df, crs = CRS("+proj=longlat +datum=WGS84")) +rasterVis::vectorplot(r, par.settings = RdBuTheme()) +``` + + ```{r} # 梯度函数 gr <- function(x) { @@ -596,6 +603,33 @@ wireframe( screen = list(z = 120, x = -70, y = 0) ) ``` +```{r plot3D-Ackley, echo=FALSE, eval=FALSE, fig.cap="二维 Ackley 函数图像", fig.width=5.5, fig.height=5} +# plot3D 也可以绘制不错的三维图形 +# 图形的视角和绘图代码需要简化 +library(plot3D) +a <- 20 +b <- 0.2 +c <- 2 * pi +M <- mesh( + seq(-10, 10, length.out = 201), + seq(-10, 10, length.out = 201) +) +u <- M$x +v <- M$y +x <- u +y <- v +z <- -a * exp(-b * sqrt(1 / 2 * (u^2 + v^2))) - + exp(1 / 2 * (cos(c * u) + cos(c * v))) + a + exp(1) + +surf3D(x, y, z, + colvar = z, colkey = F, phi = 30, ltheta = 30, + box = TRUE, ticktype = "detailed", bty = "b2" +) +``` + + 以 10 维的 Ackley 函数为例,先试一下普通的局部优化算法 --- Nelder–Mead 算法,选择初值 $(2,2,\cdots,2)$ ,看下效果,再与全局优化算法比较。 @@ -1649,6 +1683,7 @@ HorizontalGrid(grid.lines = 2, grid.col = "blue", grid.lty = 1) 简单线性回归 +[nlsr](https://cran.r-project.org/package=nlsr) 是否能给大家提供一些思路? diff --git a/office-documents.Rmd b/office-documents.Rmd new file mode 100644 index 000000000..cc0213d73 --- /dev/null +++ b/office-documents.Rmd @@ -0,0 +1,19 @@ +# 办公文档 {#chap-office-document} + +[docxtools](https://github.com/graphdr/docxtools)、[officer](https://github.com/davidgohel/officer) 和 [officedown](https://github.com/davidgohel/officedown) 大大扩展了 rmarkdown 在制作 Word/PPT 方面的功能。 + +本节探索 Markdown + Pandoc 以 Word 格式作为最终交付的可能性。R Markdown 借助 Pandoc 将 Markdown 转化为 Word 文档,继承自 Pandoc 的扩展性, R Markdown 也支持自定义 Word 模版,那如何自定义呢?首先,我们需要知道 Pandoc 内建的 Word 模版长什么样子,然后我们依样画葫芦,制作适合实际需要的模版。获取 Pandoc 2.10.1 自带的 Word 和 PPT 模版,只需在命令行中执行 + +```{bash,eval=FALSE} +# DOCX 模版 +pandoc -o custom-reference.docx --print-default-data-file reference.docx +# PPTX 模版 +pandoc -o custom-reference.pptx --print-default-data-file reference.pptx +``` + +这里其实是将 Pandoc 自带的 docx 文档 reference.docx 拷贝一份到 custom-reference.docx,而后将 custom-reference.docx 文档自定义一番,但仅限于借助 MS Word 去自定义样式。 Word 文档的 YAML 元数据定义详情见 ,如何深度自定义文档模版见 +,其它模版见 GitHub 仓库 [pandoc-templates](https://github.com/jgm/pandoc-templates)。这里提供一个[Word 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/docx-document.Rmd)供读者参考。**bookdown** 提供的函数 `word_document2()` 相比于 **rmarkdown** 提供的 `word_document()` 支持图表的交叉引用,更多细节详见帮助 `?bookdown::word_document2`。 + +::: {.rmdnote data-latex="{注意}"} +R Markdown 文档支持带编号的 Word 文档格式输出要求 Pandoc 版本 2.10.1 及以上, rmarkdown 版本 2.4 及以上。 +::: diff --git a/portable-documents.Rmd b/portable-documents.Rmd new file mode 100644 index 000000000..cc06ba3cb --- /dev/null +++ b/portable-documents.Rmd @@ -0,0 +1,167 @@ +# 便携式文档 {#chap-portable-document} + +## 文档汉化 {#sec-chinese-document} + +从 R Markdown 到 beamer 幻灯片,如何迁移 LaTeX 模版 + +默认的 PDF 文档 [PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/pdf-default.Rmd) + +详见[PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/pdf-document.Rmd) + +## 添加水印 {#sec-draft-watermark} + +[draftwatermark](https://github.com/callegar/LaTeX-draftwatermark) + +## 双栏排版 {#sec-two-column} + +普通单栏排版改为双栏排版,只需添加文档类选项 `"twocolumn"`,将 YAML 元数据中的 + +```yaml +classoption: "UTF8,a4paper,fontset=adobe,zihao=false" +``` + +变为 + +```yaml +classoption: "UTF8,a4paper,fontset=adobe,zihao=false,twocolumn" +``` + +其中,参数 `UTF8` 设定文档编码类型, `a4paper` 设置版面为 A4 纸大小,`fontset=adobe` 指定中文字体为 Adobe 字体,`zihao=false` 不指定字体大小,使用文档类 ctexart 默认的字号, + +## 参数化报告 {#sec-parameterized-reports} + +[参数化文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/parameterized-document.Rmd) + +进一步将文档类型做成参数化,实现在运行时自由选择,只需将如下两行替换掉上述一行 + +```yaml +params: + classoption: twocolumn +classoption: "`r knitr::inline_expr('params$classoption')`" +``` + +如果想要双栏的排版风格,编译时传递 documentclass 参数值,覆盖掉默认的参数值即可 + +```{r,eval=FALSE} +rmarkdown::render( + input = "examples/pdf-document.Rmd", + params = list(classoption = c("twocolumn")) +) +``` + +## 学术幻灯片 {#sec-beamer-slides} + +beamer 幻灯片也是一种 PDF 文档 [PDF 文档案例](https://github.com/XiangyunHuang/masr/blob/master/examples/beamer-verona.Rmd) + +Dirk Eddelbuettel 将几个大学的 beamer 幻灯片转化成 R Markdown 模板,收录在 [binb](https://github.com/eddelbuettel/binb) 包里,方便调用。伊利诺伊大学的 [James J Balamuta](https://thecoatlessprofessor.com/) 在 R Markdown 基础上专门为自己学校开发了一套的幻灯片模版,全部打包在 [uiucthemes](https://github.com/illinois-r/uiucthemes) 包里。 + +[komaletter](https://github.com/rnuske/komaletter) 用 Markdown 写信件 + +[memor](https://github.com/hebrewseniorlife/memor) `memor::pdf_memo()` + +[hrbrthemes](http://github.com/hrbrmstr/hrbrthemes) 提供两个文档模版 `hrbrthemes::ipsum_pdf()` 和 `hrbrthemes::ipsum()` + +此汉风主题由 [林莲枝](https://github.com/liantze/pgfornament-han/) 开发,LaTeX 宏包已发布在 [CTAN](https://www.ctan.org/pkg/pgfornament-han) 上,使用此幻灯片主题需要将相关的 LaTeX 宏包一块安装。 + +```bash +tlmgr install pgfornament pgfornament-han needspace xpatch +``` + +## 文档模版 {#sec-document-template} + +字体设置 + +:::::: {.columns} +::: {.column width="47.5%" data-latex="{0.475\textwidth}"} + +```yaml +--- +output: + pdf_document: + extra_dependencies: + DejaVuSansMono: + - scaled=0.9 + DejaVuSerif: + - scaled=0.9 + DejaVuSans: + - scaled=0.9 +--- +``` + +::: +::: {.column width="5%" data-latex="{0.05\textwidth}"} +\ + +::: +::: {.column width="47.5%" data-latex="{0.475\textwidth}"} + +```yaml +--- +output: + pdf_document: + extra_dependencies: + sourcecodepro: + - scale=0.85 + sourceserifpro: + - rmdefault + sourcesanspro: + - sfdefault +--- +``` + +::: +:::::: + +## 引用文献 {#sec-cite-doi} + +[Getting started with Zotero, Better BibTeX, and RMarkdown](https://fishandwhistle.net/post/2020/getting-started-zotero-better-bibtex-rmarkdown/) + +[^doi]: + +[knitcitations](https://github.com/cboettig/knitcitations) 包可以根据文献数字对象标识符(英文 Digital Object Identifier,简称 DOI)生成引用,以文章《A Probabilistic Grammar of Graphics》[@Pu_2020_Grammar] 为例,其 DOI 为 `10.1145/3313831.3376466`,总之, DOI 就像是文章的身份证,是一一对应的关系[^doi]。 + +```{r,eval=FALSE} +library(knitcitations) +citep(x ='10.1145/3313831.3376466') +``` + +``` +[1] "(Pu and Kay, 2020)" +``` + +在表格的格子中引用参考文献 + +```{r, results='asis'} +data.frame( + author = c("Yihui Xie", "Yihui Xie", "Yihui Xie"), + citation = c("[@xie2019]", "[@xie2015]", "[@xie2016]") +) |> + knitr::kable(format = "pandoc") +``` + +[citr](https://github.com/crsh/citr) 包提供了快速查找参考文献的 RStudio 插件,不用去原始文献库 `*.bib` 搜索查找,也会自动生成引用,非常方便,极大地提高了工作效率。 **citr** 还支持集成 [Zotero](https://www.zotero.org/) 文献管理软件,可以直接从 Zotero 中导入参考文献数据库。[rbbt](https://github.com/paleolimbot/rbbt) 包也提供了类似的功能,只要系统安装 Zotero 软件及其插件 [Better Bibtex for Zotero connector](https://retorque.re/zotero-better-bibtex/)。 + +## 自定义块 {#sec-custom-blocks} + +```r +tinytex::tlmgr_install(c('awesomebox', 'fontawesome5')) +``` + +安装 [awesomebox](https://ctan.org/pkg/awesomebox) 包,开发仓库在 ,这个 LaTeX 宏包的作用是提供几类常用的块,比如提示、注意、警告等 + +::: {.noteblock data-latex="注意"} +这是注意 +::: + +::: {.tipblock data-latex="提示"} +这是提示信息 +::: + +::: {.warningblock data-latex="警告"} +这是警告信息 +::: + +::: {.importantblock data-latex="重要"} +这是重要信息 +::: diff --git a/preamble.tex b/preamble.tex index 5f0e56302..5e013120b 100644 --- a/preamble.tex +++ b/preamble.tex @@ -2,12 +2,12 @@ \usepackage[fontset=adobe, heading=true, UTF8]{ctex} \usepackage[lotdepth=2, lofdepth=2]{subfig} -\usepackage[scale=0.85]{sourcecodepro} -\usepackage[rmdefault]{sourceserifpro} -\usepackage[sfdefault]{sourcesanspro} -\usepackage{float} -\usepackage{animate} -\usepackage{awesomebox} + +\usepackage[rmdefault,semibold]{sourceserifpro} +\usepackage[sfdefault,semibold]{sourcesanspro} +\usepackage[scale=0.85,semibold]{sourcecodepro} + +\usepackage{float,animate,awesomebox} \usepackage[skins]{tcolorbox} diff --git a/preface.Rmd b/preface.Rmd index 2d49dfbba..468eba44c 100644 --- a/preface.Rmd +++ b/preface.Rmd @@ -207,9 +207,9 @@ xfun::session_info(packages = c( 借助 **bookdown** [@xie2016] 可以将 Rmd 文件组织起来, **rmarkdown** [@rmarkdown]和 **knitr** [@xie2015] 将源文件编译成 Markdown 文件, [Pandoc](https://pandoc.org/) 将 Markdown 文件转化成 HTML 和 TeX 文件, [TinyTeX](https://yihui.name/tinytex/) [@xie2019] 可以将 TeX 文件进一步编译成 PDF 文档,书中大量的图形在用 **ggplot2** 包制作 [@Wickham_2016_ggplot2],而统计理论相关的示意图用 Base R 创作。 - -最后,本书在三个位置提供网页版, 网站 [Github Pages](https://pages.github.com/) 发布最近一次在 Travis 构建成功的版本 ,网站 [Bookdown](https://bookdown.org) 发布本地手动创建的版本 ,网站 [Netlify](https://netlify.com/) 发布最新的开发版 。 - +::: {.rmdtip data-latex="{提示}"} +得益于 Github Action 提供的测试服务,[Github Pages](https://pages.github.com/)、[Bookdown](https://bookdown.org) 和 [Netlify](https://netlify.com/) 提供的部署服务,鉴于国内的网络环境,本书托管在三个地方,分别是 。 +::: ## 记号约定 {#sec-conventions} diff --git a/regular-expressions.Rmd b/regular-expressions.Rmd index d28a54638..af1eb23ee 100644 --- a/regular-expressions.Rmd +++ b/regular-expressions.Rmd @@ -16,6 +16,11 @@ references :-) 维基百科关于 [正则表达式的描述](https://www.wikiwand.com/en/Regular_expression), [学习正则表达式](https://github.com/ziishaned/learn-regex/blob/master/translations/README-cn.md) +```{r} +# 毒鸡汤用来做文本分析 +# https://github.com/egotong/nows/blob/master/soul.sql +``` + R 内置的三种匹配模式 1. `fixed = TRUE`: 字面意思匹配 exact matching. diff --git a/renv.lock b/renv.lock index 37c48ad31..f50cc5b08 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.1.0", + "Version": "4.1.1", "Repositories": [ { "Name": "CRAN", @@ -93,10 +93,10 @@ }, "DT": { "Package": "DT", - "Version": "0.18", + "Version": "0.19", "Source": "Repository", "Repository": "CRAN", - "Hash": "a7d6660c869d4f41f856504828af4645" + "Hash": "6df7d86466f183ab0edcd8e6050b38e1" }, "Deriv": { "Package": "Deriv", @@ -147,13 +147,6 @@ "Repository": "CRAN", "Hash": "6d0e5aada1900c1bc778c00573aefd53" }, - "INLA": { - "Package": "INLA", - "Version": "21.02.23", - "Source": "Repository", - "Repository": "INLA", - "Hash": "d32b4fcbae4bed7ab9868d7221be451f" - }, "IRanges": { "Package": "IRanges", "Version": "2.26.0", @@ -218,10 +211,10 @@ }, "R6": { "Package": "R6", - "Version": "2.5.0", + "Version": "2.5.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "b203113193e70978a696b2809525649d" + "Hash": "470851b6d5d0ac559e9d01bb352b4021" }, "RColorBrewer": { "Package": "RColorBrewer", @@ -267,10 +260,10 @@ }, "RSQLite": { "Package": "RSQLite", - "Version": "2.2.7", + "Version": "2.2.8", "Source": "Repository", "Repository": "CRAN", - "Hash": "71f19d00a7736b24492fb26b483bc450" + "Hash": "1eab1dd4df2b76758fd8acab09a0e9b6" }, "RandomFields": { "Package": "RandomFields", @@ -412,10 +405,10 @@ }, "XML": { "Package": "XML", - "Version": "3.99-0.6", + "Version": "3.99-0.7", "Source": "Repository", "Repository": "CRAN", - "Hash": "59b7d0a3d18303ae30ba1246e77faa83" + "Hash": "09776593e3e9d555fbd10fe98781103a" }, "abind": { "Package": "abind", @@ -573,10 +566,10 @@ }, "bookdown": { "Package": "bookdown", - "Version": "0.23", + "Version": "0.24", "Source": "Repository", "Repository": "CRAN", - "Hash": "bc07f4b5b93bbf21075429e0c7e090f8" + "Hash": "3837766a1e1b527af25fa3e2d12a2800" }, "boot": { "Package": "boot", @@ -608,10 +601,10 @@ }, "brms": { "Package": "brms", - "Version": "2.15.0", + "Version": "2.16.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "fc74baf33468122b49024ba32e7c1a6a" + "Hash": "bc670b45ba54e4765b757d1d1ac33ee9" }, "broom": { "Package": "broom", @@ -629,17 +622,17 @@ }, "bslib": { "Package": "bslib", - "Version": "0.2.5.1", + "Version": "0.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "2f069f3f42847231aef7baa49bed97b0" + "Hash": "074ebc936dbcecd7115ed8083643b550" }, "cachem": { "Package": "cachem", - "Version": "1.0.5", + "Version": "1.0.6", "Source": "Repository", "Repository": "CRAN", - "Hash": "5346f76a33eb7417812c270b04a5581b" + "Hash": "648c5b3d71e6a37e3043617489a0a0e9" }, "callr": { "Package": "callr", @@ -838,6 +831,13 @@ "Repository": "CRAN", "Hash": "2b06f9e415a62b6762e4b8098d2aecbc" }, + "cubelyr": { + "Package": "cubelyr", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "da6ba095ae2dc4ecab6e9156633c5901" + }, "curl": { "Package": "curl", "Version": "4.3.2", @@ -1048,10 +1048,15 @@ }, "flexdashboard": { "Package": "flexdashboard", - "Version": "0.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a0534e167498732ac0137e7ba46364dc" + "Version": "0.5.2.9000", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "flexdashboard", + "RemoteUsername": "rstudio", + "RemoteRef": "HEAD", + "RemoteSha": "96fa830ca868fe9463bbff0145b518310be48c63", + "Hash": "3b2fa4fbd7c580f8975daabe48c36d84" }, "fontcm": { "Package": "fontcm", @@ -1102,13 +1107,6 @@ "Repository": "CRAN", "Hash": "870d6d5d39b23923d0c816f904075b4c" }, - "fresh": { - "Package": "fresh", - "Version": "0.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "fa54367040deb4537da49b7ac0ee5770" - }, "fs": { "Package": "fs", "Version": "1.5.0", @@ -1118,10 +1116,10 @@ }, "future": { "Package": "future", - "Version": "1.21.0", + "Version": "1.22.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "f25fad6bee82b7ab01f055e2d813b96f" + "Hash": "9c56382c3e53f0b4fc0fc16d88fc3974" }, "gamm4": { "Package": "gamm4", @@ -1174,10 +1172,10 @@ }, "gert": { "Package": "gert", - "Version": "1.3.1", + "Version": "1.3.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "8612346a70ca2b2bebb0ef008d5c0e30" + "Hash": "72b34cde959b806e86cb99fc12d1ba58" }, "ggalluvial": { "Package": "ggalluvial", @@ -1202,15 +1200,10 @@ }, "ggbump": { "Package": "ggbump", - "Version": "0.1.99999", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "ggbump", - "RemoteUsername": "davidsjoberg", - "RemoteRef": "HEAD", - "RemoteSha": "b34c70fbb51fde46e1cb1587136c5699d2f49a4c", - "Hash": "afa2715ba60b85daa0321adc5a1adf3a" + "Version": "0.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "695abfc5a4bf0049b128e2d9f08ac7ef" }, "ggdendro": { "Package": "ggdendro", @@ -1233,6 +1226,13 @@ "Repository": "CRAN", "Hash": "bec9e66131b3d954917a59827ab8c2ee" }, + "ggiraph": { + "Package": "ggiraph", + "Version": "0.7.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "38b169da6947549169248a1afe549bb1" + }, "ggmosaic": { "Package": "ggmosaic", "Version": "0.3.3", @@ -1296,6 +1296,13 @@ "Repository": "CRAN", "Hash": "4512013654d28dbf39dc4a8f5b9bdde2" }, + "ggthemes": { + "Package": "ggthemes", + "Version": "4.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "617fb2c300f68e8b1ba21043fba88fd7" + }, "gh": { "Package": "gh", "Version": "1.3.0", @@ -1326,10 +1333,10 @@ }, "glmmTMB": { "Package": "glmmTMB", - "Version": "1.1.2", + "Version": "1.1.2.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "3e91f1654087101e9fc03382ab7d172d" + "Hash": "6a165bba675490c818242eddccef26ee" }, "glmnet": { "Package": "glmnet", @@ -1465,10 +1472,10 @@ }, "htmltools": { "Package": "htmltools", - "Version": "0.5.1.1", + "Version": "0.5.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "af2c2531e55df5cf230c4b5444fc973c" + "Hash": "526c484233f42522278ab06fb185cb26" }, "htmlwidgets": { "Package": "htmlwidgets", @@ -1479,10 +1486,10 @@ }, "httpuv": { "Package": "httpuv", - "Version": "1.6.1", + "Version": "1.6.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "54344a78aae37bc6ef39b1240969df8e" + "Hash": "a42959b003aad9fc351b10250c5a6b77" }, "httr": { "Package": "httr", @@ -1498,6 +1505,13 @@ "Repository": "CRAN", "Hash": "3987784c19192ad0f2261c456d936df1" }, + "idbr": { + "Package": "idbr", + "Version": "1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "de8c9b129fc7178237cf3c392c93054b" + }, "ids": { "Package": "ids", "Version": "1.0.1", @@ -1589,6 +1603,13 @@ "Repository": "CRAN", "Hash": "49b625e6aabe4c5f091f5850aba8ff78" }, + "keras": { + "Package": "keras", + "Version": "2.6.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "47049edbfeec9cc84b1629a16a7f9e04" + }, "kernlab": { "Package": "kernlab", "Version": "0.9-29", @@ -1612,10 +1633,10 @@ }, "later": { "Package": "later", - "Version": "1.2.0", + "Version": "1.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "b61890ae77fea19fc8acadd25db70aa4" + "Hash": "7e7b457d7766bc47f2a5f21cc2984f8e" }, "lattice": { "Package": "lattice", @@ -1645,13 +1666,6 @@ "Repository": "CRAN", "Hash": "e3d73becdeb92754d27172d278cbf61d" }, - "leaflet.extras": { - "Package": "leaflet.extras", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8dbfc2c4d7ca2660971caf1153ca95c2" - }, "leaflet.providers": { "Package": "leaflet.providers", "Version": "1.9.0", @@ -1722,12 +1736,19 @@ "Repository": "CRAN", "Hash": "1ebfdc8a3cfe8fe19184f5481972b092" }, + "lwgeom": { + "Package": "lwgeom", + "Version": "0.2-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c7d964d354dd2261b2863bbc5c4d3915" + }, "magick": { "Package": "magick", - "Version": "2.7.2", + "Version": "2.7.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "db36bbb91bf293f0550c51ecbf6f1928" + "Hash": "56fbad418aa50939ed8c3028126af8d7" }, "magrittr": { "Package": "magrittr", @@ -1773,10 +1794,10 @@ }, "matrixStats": { "Package": "matrixStats", - "Version": "0.60.0", + "Version": "0.60.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "915f59111573046dc425410b435ab5ed" + "Hash": "c3cd22becabe4f29ee047221cc2b89ae" }, "maxLik": { "Package": "maxLik", @@ -1920,10 +1941,10 @@ }, "openssl": { "Package": "openssl", - "Version": "1.4.4", + "Version": "1.4.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "f4dbc5a47fd93d3415249884d31d6791" + "Hash": "5406fd37ef0bf9b88c8a4f264d6ec220" }, "openxlsx": { "Package": "openxlsx", @@ -1954,10 +1975,10 @@ }, "packrat": { "Package": "packrat", - "Version": "0.6.0", + "Version": "0.7.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "0d6cc4c357e7602bb3eee299f4cfc2a5" + "Hash": "95e8c3825efcaad09411799da92c0af9" }, "palmerpenguins": { "Package": "palmerpenguins", @@ -2071,13 +2092,6 @@ "Repository": "CRAN", "Hash": "6fa90d23863d8e1d30b7e9b1427257c0" }, - "pracma": { - "Package": "pracma", - "Version": "2.3.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cd80df226af8555a5cff3e7a76bd0756" - }, "praise": { "Package": "praise", "Version": "1.0.0", @@ -2218,13 +2232,6 @@ "Repository": "CRAN", "Hash": "d9e9c20a829a25d2bdc6706dbc437b7d" }, - "randomForest": { - "Package": "randomForest", - "Version": "4.6-14", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "6367ba8128568cc5ebf8082e440948e4" - }, "rappdirs": { "Package": "rappdirs", "Version": "0.3.3", @@ -2358,6 +2365,13 @@ "Repository": "CRAN", "Hash": "94a44c3f0d49ceb80c385610696873ca" }, + "rgl": { + "Package": "rgl", + "Version": "0.107.14", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e7dc7599825dea5d5809478586948141" + }, "rio": { "Package": "rio", "Version": "0.5.27", @@ -2381,10 +2395,10 @@ }, "rlist": { "Package": "rlist", - "Version": "0.4.6.1", + "Version": "0.4.6.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "75886cbfe663d94199639f442d43836a" + "Hash": "290c8ea0700d2e7258082d0025386e68" }, "rmarkdown": { "Package": "rmarkdown", @@ -2393,6 +2407,13 @@ "Repository": "CRAN", "Hash": "1fb097f233ee98968f8e5d0bcd42df6d" }, + "rnaturalearthdata": { + "Package": "rnaturalearthdata", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0d34b89b43e900467e60f5449226f3e3" + }, "rngtools": { "Package": "rngtools", "Version": "1.5", @@ -2526,13 +2547,6 @@ "Repository": "CRAN", "Hash": "d81cd7976e625f3c0d3555df809632a9" }, - "servr": { - "Package": "servr", - "Version": "0.23", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "75a40cd4f8503fe175be6b213009767f" - }, "sessioninfo": { "Package": "sessioninfo", "Version": "1.1.1", @@ -2575,27 +2589,6 @@ "Repository": "CRAN", "Hash": "6e3b6ae7fe02b5859e4bb277f218b8ae" }, - "shinyWidgets": { - "Package": "shinyWidgets", - "Version": "0.6.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7dd5f3ee96c64a47fdbc6e437ff1c7e1" - }, - "shinydashboard": { - "Package": "shinydashboard", - "Version": "0.7.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "133639dc106955eee4ffb8ec73edac37" - }, - "shinydashboardPlus": { - "Package": "shinydashboardPlus", - "Version": "2.0.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "038854685b183330e2fd303c58f6e793" - }, "shinyjs": { "Package": "shinyjs", "Version": "2.0.0", @@ -2619,10 +2612,10 @@ }, "showtext": { "Package": "showtext", - "Version": "0.9-3", + "Version": "0.9-4", "Source": "Repository", "Repository": "CRAN", - "Hash": "e9c17b6d8e5c352a46103e8b51d6a5f1" + "Hash": "57151c081eee689dc669d1bb9a77bbb7" }, "showtextdb": { "Package": "showtextdb", @@ -2696,10 +2689,10 @@ }, "splines2": { "Package": "splines2", - "Version": "0.4.3", + "Version": "0.4.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "68169b066064baace14e686bfd688eb3" + "Hash": "029f2d861b671d109cad3b9d0fd9f2ac" }, "stackr": { "Package": "stackr", @@ -2713,12 +2706,19 @@ "RemoteSha": "7db4a6f3b22d7f5a4f8b187a8b66bf853808b0e4", "Hash": "036914312419521004a44d67afc6e8cd" }, + "stars": { + "Package": "stars", + "Version": "0.5-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "15a997ae176b92af20b3516ff69f07be" + }, "stringi": { "Package": "stringi", - "Version": "1.7.3", + "Version": "1.7.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "7943cfae120c77a255025e5f63856532" + "Hash": "ebaccb577da50829a3bb1b8296f318a5" }, "stringr": { "Package": "stringr", @@ -2729,10 +2729,10 @@ }, "survival": { "Package": "survival", - "Version": "3.2-12", + "Version": "3.2-13", "Source": "Repository", "Repository": "CRAN", - "Hash": "dcdc241a75d17656fec8d02cd8215c39" + "Hash": "6f0a0fadc63bc6570fe172770f15bbc4" }, "svglite": { "Package": "svglite", @@ -2778,17 +2778,17 @@ }, "tensorflow": { "Package": "tensorflow", - "Version": "2.5.0", + "Version": "2.6.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "b51b9d9ab21c2cc66aeff458aba89a00" + "Hash": "d8cf6d71dfb687037c512ed535478593" }, "terra": { "Package": "terra", - "Version": "1.3-4", + "Version": "1.3-22", "Source": "Repository", "Repository": "CRAN", - "Hash": "4f08b1ac13ab44445efe56c1eaca59c5" + "Hash": "1b027d43e214984b0d19d92fe01fcc02" }, "testthat": { "Package": "testthat", @@ -2797,6 +2797,13 @@ "Repository": "CRAN", "Hash": "575216c9946ca70016c3ffb9c31709ba" }, + "tfautograph": { + "Package": "tfautograph", + "Version": "0.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "69cdf05b40b241f9a2691eabd575c992" + }, "tfruns": { "Package": "tfruns", "Version": "1.5.0", @@ -2813,10 +2820,10 @@ }, "tibble": { "Package": "tibble", - "Version": "3.1.3", + "Version": "3.1.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "038455513fde65e79c25e724e0c84ca2" + "Hash": "5e8ad5621e5c94b24ec07b88eee13df8" }, "tidyr": { "Package": "tidyr", @@ -2897,10 +2904,10 @@ }, "treemap": { "Package": "treemap", - "Version": "2.4-2", + "Version": "2.4-3", "Source": "Repository", "Repository": "CRAN", - "Hash": "11040d92c1144b4d8ca6470b0662c871" + "Hash": "754d9ed7b25a790451f5da1a62fc7d04" }, "treemapify": { "Package": "treemapify", @@ -3014,19 +3021,12 @@ "Repository": "CRAN", "Hash": "1a23013f39e67bb57cbda6f4ddde5470" }, - "waiter": { - "Package": "waiter", - "Version": "0.2.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "30b15e2b6a0abc1e67bb468d9d851071" - }, "waldo": { "Package": "waldo", - "Version": "0.2.5", + "Version": "0.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "20c45f1d511a3f730b7b469f4d11e104" + "Hash": "312b264fae22fdba83b7a74187a24da8" }, "webshot": { "Package": "webshot", @@ -3056,20 +3056,6 @@ "Repository": "CRAN", "Hash": "60e191a866c5649a8f58a458f5e60edf" }, - "xaringan": { - "Package": "xaringan", - "Version": "0.22", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c0ed5b878f24730ed7f4afbb95af84ae" - }, - "xaringanthemer": { - "Package": "xaringanthemer", - "Version": "0.4.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b6c551c7ab2791ec57c3d7326b79ff2f" - }, "xfun": { "Package": "xfun", "Version": "0.25", @@ -3126,6 +3112,13 @@ "Repository": "CRAN", "Hash": "2826c5d9efb0a88f657c7a679c7106db" }, + "zeallot": { + "Package": "zeallot", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ee9b643aa8331c45d8d82eb3a137c9bc" + }, "zip": { "Package": "zip", "Version": "2.2.0", diff --git a/reproducible-workflows.Rmd b/reproducible-workflows.Rmd new file mode 100644 index 000000000..fce3bc677 --- /dev/null +++ b/reproducible-workflows.Rmd @@ -0,0 +1,5 @@ +# 工作流 {#chap-reproducible-workflows} + +[drake](https://github.com/ropensci/drake) 一站式可重复性研究工作空间打造者,用户手册 和学习材料 + + diff --git a/spatial-analysis.Rmd b/spatial-analysis.Rmd index 4fe780e9a..3db09689d 100644 --- a/spatial-analysis.Rmd +++ b/spatial-analysis.Rmd @@ -1,41 +1,30 @@ -# 空间分析 {#chap-spatial-analysis} +# 空间数据分析 {#chap-spatial-analysis} - - -[mapsf](https://github.com/riatelab/mapsf) 是 [cartography](https://github.com/riatelab/cartography/) 的继任者,它更加友好、轻量和稳健。 +Timothée Giraud 创建的 [riatelab](https://github.com/riatelab/) 组织开发系列 R 包工具,可以绘制各种类型和风格的地图,专题地图工具已经从 [cartography](https://github.com/riatelab/cartography/) 过渡到 [mapsf](https://github.com/riatelab/mapsf),它更加友好、轻量和稳健。类似的 R 包还有 [choroplethr](https://github.com/trulia/choroplethr),只是上次更新在 2015 年。 -[choroplethr](https://github.com/trulia/choroplethr) 简化创建 thematic maps 的过程。 +空间数据可视化常常离不开基础地图数据,不同的 R 包依赖的地图服务有所不同,比如 + [RgoogleMaps](https://github.com/markusloecher/rgooglemaps)、[ggmap](https://github.com/dkahle/ggmap) 和 [googleway](https://github.com/SymbolixAU/googleway) 主要依赖谷歌的地图数据。 +而 [mapdeck](https://github.com/SymbolixAU/mapdeck) 基于 [deck.gl](https://github.com/visgl/deck.gl) 和 [Mapbox](https://github.com/mapbox/mapbox-gl-js) 支持移动和网页应用,GPU 渲染等。[leaflet](https://github.com/rstudio/leaflet) 则基于开源的[Leaflet](https://github.com/Leaflet/Leaflet)库提供交互式空间数据可视化的能力。 -[ggmap](https://github.com/dkahle/ggmap) 依赖 [RgoogleMaps](https://github.com/markusloecher/rgooglemaps) 就不介绍了 -[mapdeck](https://github.com/SymbolixAU/mapdeck) 支持调用 GPU 渲染 -[deck.gl](https://github.com/visgl/deck.gl) MIT 协议 +[芝加哥大学空间数据科学中心](https://spatial.uchicago.edu/) 开发的 R 包 [rgeoda](https://github.com/GeoDaCenter/rgeoda) 基于开源的 C++ 库[GeoDa](https://github.com/GeoDaCenter/geoda),提供一系列空间数据分析能力,包括探索性空间数据分析、空间聚类检测和聚类分析。 -[googleway](https://github.com/SymbolixAU/googleway) +Edzer Pebesma 和 Roger Bivand 合著的 [Spatial Data Science with applications in R](https://www.r-spatial.org/book),Christopher K. Wikle, Andrew Zammit-Mangion 和 Noel Cressie 合著的 [Spatio-Temporal Statistics with R](https://spacetimewithr.org/)。推荐学习 Edzer Pebesma 在几届国际 R 语言大会上的材料,2021 年的[R Spatial](https://edzer.github.io/UseR2021/),2020 年的[Analyzing and visualising spatial and spatiotemporal data cubes Part I](https://edzer.github.io/UseR2020/), +2019 年的[Spatial workshop part I](https://edzer.github.io/UseR2019/part1.html) 和 [Spatial workshop part II](https://edzer.github.io/UseR2019/part2.html), +2017 年的[Spatial Data in R: New Directions](https://edzer.github.io/UseR2017/) +2016 年的[Handling and Analyzing Spatial, Spatiotemporal and Movement Data](https://edzer.github.io/UseR2016/)。 -Edzer Pebesma + -- UseR2020 [Analyzing and visualising spatial and spatiotemporal data cubes - Part I](https://edzer.github.io/UseR2020/) -- UseR2019 [UseR! 2019 Spatial workshop part I](https://edzer.github.io/UseR2019/part1.html) [UseR! 2019 Spatial workshop part II](https://edzer.github.io/UseR2019/part2.html) -- UseR2017 [Spatial Data in R: New Directions](https://edzer.github.io/UseR2017/) -- UseR2016 [Handling and Analyzing Spatial, Spatiotemporal and Movement Data](https://edzer.github.io/UseR2016/) ```{r} library(sp) @@ -53,6 +42,83 @@ library(sfarrow) # https://github.com/wcjochem/sfarrow # library(highcharter) # 要替换掉 ``` +```{r,eval=FALSE} +library(maps) +library(mapdata) +map("china", fill = F, col = terrain.colors(100)) + +library(leaflet) + +mapChina = map("china", fill = F, plot = FALSE) +leaflet(data = mapChina) |> + addTiles() |> + addPolygons(fillColor = topo.colors(10, alpha = NULL), stroke = FALSE) + + +# From https://leafletjs.com/examples/choropleth/us-states.js +# 返回 sp 对象 +states <- geojsonio::geojson_read("json/us-states.geojson", what = "sp") + +bins <- c(0, 10, 20, 50, 100, 200, 500, 1000, Inf) +pal <- colorBin("YlOrRd", domain = states$density, bins = bins) + +labels <- sprintf( + "%s
%g people / mi2", + states$name, states$density +) %>% lapply(htmltools::HTML) + +leaflet(states) %>% + setView(-96, 37.8, 4) %>% + addProviderTiles("MapBox", options = providerTileOptions( + id = "mapbox.light", + accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN'))) %>% + addPolygons( + fillColor = ~pal(density), + weight = 2, + opacity = 1, + color = "white", + dashArray = "3", + fillOpacity = 0.7, + highlight = highlightOptions( + weight = 5, + color = "#666", + dashArray = "", + fillOpacity = 0.7, + bringToFront = TRUE), + label = labels, + labelOptions = labelOptions( + style = list("font-weight" = "normal", padding = "3px 8px"), + textsize = "15px", + direction = "auto")) %>% + addLegend(pal = pal, values = ~density, opacity = 0.7, title = NULL, + position = "bottomright") +``` + + +```{r,eval=FALSE} +library(sp) +data(meuse) +coordinates(meuse) <- ~x+y +proj4string(meuse) <- CRS("+init=epsg:28992") +plot(meuse) +``` + +```{r,eval=FALSE} +library(sp) +demo(meuse, ask = FALSE, echo = FALSE) # loads the meuse data sets +library(maptools) +crs.longlat = CRS("+init=epsg:4326") +meuse.longlat = spTransform(meuse, crs.longlat) +plot(meuse.longlat, axes = TRUE) + +library(mapview) +library(rgdal) # for readOGR +nc <- readOGR(system.file("shapes/", package="maptools"), "sids") +proj4string(nc) <- CRS("+proj=longlat +datum=NAD27") +class(nc) +mapview(nc, zcol = c("SID74", "SID79"), alpha.regions = 1.0, legend = TRUE) +``` + ```{r,eval=FALSE} library(RgoogleMaps) @@ -64,7 +130,7 @@ library(mapsf) 冈比亚地形 -```{r gambia-altitude,fig.cap="冈比亚地形海拔数据",fig.width=8,fig.height=4,message=FALSE,warning=FALSE,cache=TRUE} +```{r gambia-altitude, fig.cap="冈比亚地形海拔数据", fig.width=8, fig.height=4, message=FALSE, warning=FALSE, cache=TRUE} sp_path <- "data/" # 存储临时地形文件 if (!dir.exists(sp_path)) dir.create(sp_path, recursive = TRUE) # Gambia 海拔数据 @@ -113,62 +179,6 @@ gambia_agg <- aggregate( $Y \sim b(1,p)$ 每个人检验结果,就是感染 1 或是没有感染 0,感染率 $p$ 的建模分析,个体水平 -```{r hc-gambia-pos,eval=knitr::is_html_output(),fig.cap="各个村庄疟疾流行度"} -library(highcharter) -hchart(gambia_agg, "bubble", hcaes(x = x, y = y, fill = pos, size = pos), - maxSize = "5%", name = "Gambia", showInLegend = FALSE -) %>% - hc_yAxis(title = list(text = "Latitude")) %>% - hc_xAxis(title = list(text = "Longitude"), labels = list(align = "center")) %>% - hc_colorAxis( - stops = color_stops(colors = hcl.colors(palette = "Plasma", n = 10)) - ) %>% - hc_tooltip( - pointFormat = "({point.x:.2f}, {point.y:.2f})
Size: {point.z:.2f}" - ) -``` - - -```{r hc-gambia,eval=knitr::is_html_output()} -# gm_data <- download_map_data("https://code.highcharts.com/mapdata/countries/gm/gm-all.js") -# get_data_from_map(gm_data) - -hcmap("countries/gm/gm-all.js") %>% - hc_title(text = "Gambia") -``` - -```{r hc-usa-arrest,eval=knitr::is_html_output()} -data("USArrests", package = "datasets") -data("usgeojson") # 加载地图数据 地图数据的结构 - -USArrests <- transform(USArrests, state = rownames(USArrests)) - -highchart() %>% - hc_title(text = "Violent Crime Rates by US State") %>% - hc_subtitle(text = "Source: USArrests data") %>% - hc_add_series_map(usgeojson, USArrests, - name = "Murder arrests (per 100,000)", - value = "Murder", joinBy = c("woename", "state"), - dataLabels = list( - enabled = TRUE, - format = "{point.properties.postalcode}" - ) - ) %>% - hc_colorAxis(stops = color_stops()) %>% - hc_legend(valueDecimals = 0, valueSuffix = "%") %>% - hc_mapNavigation(enabled = TRUE) -``` - -highcharter 包含三个数据集分别是: worldgeojson 世界地图(国家级)、 usgeojson 美国地图(州级)、 uscountygeojson 美国地图(城镇级)。其它地图数据见 。 - - -```{r hc-china-map,eval=knitr::is_html_output(),cache=TRUE} -# 添加地图数据 -hcmap(map = "countries/cn/custom/cn-all-sar-taiwan.js") %>% - hc_title(text = "中国地图") -``` - - ```{r,eval=FALSE} library(mapdeck) # 多边形 @@ -187,6 +197,42 @@ mapdeck( location = c(145, -37.8), zoom = 10) %>% +```{r,eval=FALSE} +# https://github.com/geodacenter/rgeoda/ +library(rgeoda) +library(sf) + +guerry_path <- system.file("extdata", "Guerry.shp", package = "rgeoda") +guerry <- st_read(guerry_path) + +crm_prp = guerry["Crm_prp"] +queen_w <- queen_weights(guerry) + +lisa <- local_moran(queen_w, crm_prp) + +lisa_colors <- lisa_colors(lisa) +lisa_labels <- lisa_labels(lisa) +lisa_clusters <- lisa_clusters(lisa) + +plot(st_geometry(guerry), + col = sapply(lisa_clusters, function(x) { + return(lisa_colors[[x + 1]]) + }), + border = "#333333", lwd = 0.2 +) +title(main = "Local Moran Map of Crm_prs") +legend("bottomleft", + legend = lisa_labels, + fill = lisa_colors, border = "#eeeeee" +) + + +library(spData) + +library(sf) +wheat <- st_read(system.file("shapes/wheat.shp", package="spData")) +plot(wheat) +``` ## 运行环境 {#sec-spatial-analysis-session} diff --git a/spatial-modeling.Rmd b/spatial-modeling.Rmd index 1494bb66d..b7e75f1ce 100644 --- a/spatial-modeling.Rmd +++ b/spatial-modeling.Rmd @@ -1,16 +1,19 @@ -# 空间建模 {#chap-spatial-modeling} +# 空间数据建模 {#chap-spatial-modeling} ```{r, eval=!require('INLA'), include=FALSE} if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install(c('graph', 'Rgraphviz')) -install.packages("INLA", repos = c("https://inla.r-inla-download.org/R/stable", getOption("repos")), dependencies = TRUE) +install.packages("INLA", + repos = c("https://inla.r-inla-download.org/R/stable", getOption("repos")), + dependencies = TRUE +) ``` ```{r} library(geoR) -library(INLA) -# library(leaflet) +# library(INLA) +library(leaflet) library(highcharter) ``` @@ -18,13 +21,13 @@ library(highcharter) loaloa 眼线虫病,人群感染,村庄水平, 响应变量服从二项分布 $Y \sim b(n,p)$,每个村庄感染的人数 $Y_i \sim b(n_i, p_i)$ 其中 $n_i$ 是第 $i$ 个村庄调查的人数, $p_i$ 是观测的感染率 -```{r} +```{r,eval=FALSE} data("loaloa", package = "PrevMap") ``` -```{r hc-map-cameroon,eval=knitr::is_html_output(),cache=TRUE} +```{r hc-map-cameroon, eval=knitr::is_html_output()} hcmap(map = "countries/cm/cm-all.js") %>% hc_title(text = "喀麦隆及其周边地区眼线虫病流行度") ``` diff --git a/spatial-viz.Rmd b/spatial-viz.Rmd new file mode 100644 index 000000000..bfdc1b11e --- /dev/null +++ b/spatial-viz.Rmd @@ -0,0 +1,321 @@ +# 空间数据可视化 {#chap-spatial-viz} + +```{r,include=FALSE} +source(file = "_common.R") +``` + +```{r} +library(sp) +library(rgdal) +library(maps) +library(mapproj) +library(maptools) +library(sf) +library(ggplot2) +library(raster) +library(abind) +library(cubelyr) +library(stars) +library(echarts4r) +library(ggthemes) +``` + +[王江浩](https://github.com/Jianghao) +[北京城市实验室](https://www.beijingcitylab.com/) + + +Robert J. Hijmans [^Robert-Hijmans] 开发了 [raster](https://github.com/rspatial/raster) 包用于网格空间数据的读、写、操作、分析和建模,同时维护了空间数据分析的网站 。Edzer Pebesma [^Edzer-Pebesma] 和 Roger Bivand 等创建了 [sp](https://github.com/edzer/sp/) 包定义了空间数据类型和方法,提供了大量的空间数据操作方法,同时维护了空间数据对象 sp 的绘图网站 ,他们也一起合作写了新书 [Spatial Data Science](https://keen-swartz-3146c4.netlify.com/),提供了在线 [网页版](https://www.r-spatial.org/book/) 书籍及其 [源代码](https://github.com/edzer/sdsr)。Edzer Pebesma 后来开发了 [sf](https://github.com/r-spatial/sf/) 包重新定义了空间数据对象和操作方法,并维护了空间数据分析、建模和可视化网站 + +[^Robert-Hijmans]: Department of Environmental Science and Policy at the University of California, Davis. [Ecology, Geography, and Agriculture](https://biogeo.ucdavis.edu/) +[^Edzer-Pebesma]: Institute for Geoinformatics of the University of Münster. + +课程案例学习 + +1. [2018-Introduction to Geospatial Raster and Vector Data with R](https://datacarpentry.org/r-raster-vector-geospatial/) 空间数据分析课程 +1. [Peter Ellis](http://freerangestats.info) 新西兰大选和普查数据 [More cartograms of New Zealand census data: district and city level](http://freerangestats.info/blog/nz.html) +1. [2017-Mapping oil production by country in R](http://sharpsightlabs.com/blog/map-oil-production-country-r/) 石油产量在全球的分布 +1. [2017-How to highlight countries on a map](https://www.sharpsightlabs.com/blog/highlight-countries-on-map/) 高亮地图上的国家 +1. [2017-Mapping With Sf: Part 3](https://ryanpeek.github.io/2017-11-21-mapping-with-sf-part-3/) +1. [Data Visualization Shiny Apps](https://ignaciomsarmiento.github.io/software.html) 数据可视化核密度估计 In this app I identify crime hotspots using a bivariate density estimation strategy +1. [Association of Statisticians of American Religious Bodies (ASARB) viridis USA map](http://www.rpubs.com/cgarey/ProjectOneFinal) +1. [出租车行车轨迹数据](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) +1. [Geospatial processing with Clickhouse-CARTO Blog](https://carto.com/blog/geospatial-processing-with-clickhouse/) + + +## 空间数据 {#sec-spatial-data} + +空间数据存储在数据库中,比如 [PostGIS](https://postgis.net/),它是对象关系数据库 [PostgreSQL](https://postgresql.org/) 在空间数据库方面的扩展。 + +### data.frame {#subsec-dataframe} + +```{r view-quakes-data, out.lines=6} +data("quakes") +quakes +``` + +### sp {#subsec-sp} + +[sp-gallery](https://edzer.github.io/sp/) + + +```{r} +library(sp) +``` + +空间数据对象,以类 sp 方式存储 [@Pebesma_2005_sp] + +```{r} +library(sp) +data("meuse") +coordinates(meuse) <- ~x+y +proj4string(meuse) <- CRS("+init=epsg:28992") +class(meuse) +proj4string(meuse) +``` + + +```{r,fig.cap="sp 对象",fig.asp=0.8,fig.width=5,out.width="58.33%"} +plot(meuse, axes = TRUE) +``` + +```{r,fig.cap="sp 对象",fig.asp=0.8,fig.width=5,out.width="58.33%"} +library(rgdal) +crs.longlat <- CRS("+init=epsg:4326") +meuse.longlat <- spTransform(meuse, crs.longlat) +plot(meuse.longlat, axes = TRUE) +``` + + +```{r} +library(maptools) +fname <- system.file("shapes/sids.shp", package = "maptools") +p4s <- CRS("+proj=longlat +datum=NAD27") +nc <- readShapePoly(fname, proj4string = p4s) +plot(nc, axes = TRUE, col = grey(1 - nc$SID79 / 57)) +``` + +::: {.rmdwarn data-latex="{警告}"} +maptools 提供的 `readShapePoly` 函数去读取 shp 文件的方式已经过时,推荐使用 `rgdal::readOGR` 或者 `sf::st_read` 方式读取 +::: + + +```{r} +# Trellis maps +arrow <- list("SpatialPolygonsRescale", + layout.north.arrow(2), + offset = c(-76, 34), scale = 0.5, which = 2 +) +spplot(nc, c("SID74", "SID79"), + as.table = TRUE, + scales = list(draw = T), sp.layout = arrow +) +``` + + +### sf {#subsec-sf} + +```{r} +library(sf) +library(ggplot2) +nc <- read_sf(system.file("gpkg/nc.gpkg", package = "sf")) +nc2 <- nc |> + dplyr::select(SID74, SID79) |> + tidyr::gather(VAR, SID, -geom) +ggplot() + + geom_sf(data = nc2, aes(fill = SID)) + + facet_wrap(~VAR, ncol = 1) +``` + + +### raster {#subsec-raster} + +raster 包定义了获取和操作空间 raster 类型数据集的类和方法,rasterVis 补充加强了 raster 包在数据可视化和交互方面的功能。可视化是基于 lattice 的 + +[rastervis-gh]: https://github.com/oscarperpinan/rastervis +[rastervis-web]: https://oscarperpinan.github.io/rastervis/ +[rastervis-faq]: https://oscarperpinan.github.io/rastervis/FAQ.html + +[raster](https://github.com/rspatial/raster) 包的开发已经被作者 [Robert J. Hijmans](https://desp.ucdavis.edu/people/robert-j-hijmans) 迁移到 Github 上啦,官方文档 + +星号 * 标记的是 S3 方法 + +```{r} +methods(plot) +``` + +查看函数的定义 + +```{r} +getAnywhere(plot.raster) +``` + +rasterImage 函数来绘制图像,如果想知道 `rasterImage` 的内容可以继续看 `getAnywhere(rasterImage)` + +```{r} +getAnywhere(rasterImage) +``` + +通过查看函数的帮助 `?rasterImage` ,我们需要重点关注一下 +参数 *image* 传递的 raster 对象 + +```{r,fig.cap="raster 图像",dev.args=list(bg = "thistle"),warning=FALSE,fig.asp=1,out.width="50%",fig.width=30/7} +plot(c(100, 250), c(300, 450), type = "n", xlab = "", ylab = "") +image <- as.raster(matrix(0:1, ncol = 5, nrow = 3)) +rasterImage(image, 100, 300, 150, 350, interpolate = FALSE) +rasterImage(image, 100, 400, 150, 450) +rasterImage(image, 200, 300, 200 + xinch(.5), 300 + yinch(.3), + interpolate = FALSE +) +rasterImage(image, 200, 400, 250, 450, + angle = 15, interpolate = FALSE) +``` + +```{r} +library(raster) +meuse.test <- raster(x = system.file("external/test.grd", package="raster")) +class(meuse.test) +``` + + +```{r,fig.cap="raster 对象",fig.asp=.8} +plot(meuse.test, legend = F) +``` + +### stars {#subsec-stars} + +Edzer Pebesma 开发了 stars 包 + +```{r} +# https://resources.rstudio.com/rstudio-conf-2019/spatial-data-science-in-the-tidyverse +library(abind) +library(sf) +library(cubelyr) +library(stars) +x <- read_stars(system.file("tif/L7_ETMs.tif", package = "stars")) + +ggplot() + + geom_stars(data = x) + + coord_equal() + + facet_wrap(~band) + + theme_bw() + + scale_fill_viridis_c() + + scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) +``` + + +## 可视化 {#sec-viz-echarts4r} + +### 斐济地震带分布 {#subsec-fiji-quakes} + +相比于 **plotly**,**echarts4r** 更加轻量,这得益于 JavaScript 库 [Apache ECharts](https://github.com/apache/echarts)。 +前者 MIT 协议,后者采用 Apache-2.0 协议,都可以商用。Apache ECharts 是 Apache 旗下顶级开源项目,由百度前端技术团队贡献,中文文档也比较全,学习起来门槛会低一些。 + +```{r fiji-quakes-echarts4r, fig.cap="斐济地震带", eval=knitr::is_html_output()} +library(echarts4r) +quakes |> + e_charts(x = long) |> + e_geo( + roam = TRUE, + boundingCoords = list( + c(185, -10), + c(165, -40) + ) + ) |> + e_scatter( + serie = lat, + size = mag, # 点的大小映射到震级 + # legend = F, # 是否移除图例 + name = "斐济地震带", + coord_system = "geo" + ) |> + e_visual_map( + serie = mag, scale = e_scale, + inRange = list(color = terrain.colors(10)) + ) |> + e_tooltip() +``` + + + +### 美国各城镇失业率 {#subsec-usa-unemp} + +```{r unemploymentGG,fig.cap="2009年美国各城镇失业率"} +# 数据来源 https://datasets.flowingdata.com/unemployment09.csv +unemp <- read.csv( + file = "http://datasets.flowingdata.com/unemployment09.csv", + header = FALSE, stringsAsFactors = FALSE +) +names(unemp) <- c( + "id", "state_fips", "county_fips", "name", "year", + "?", "?", "?", "rate" +) +unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name)) +unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name) + +county_df <- map_data("county") +names(county_df) <- c("long", "lat", "group", "order", "state_name", "county") +county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))] +county_df$state_name <- NULL + +state_df <- map_data("state") +# Combine together +choropleth <- merge(county_df, unemp, by = c("state", "county")) +choropleth <- choropleth[order(choropleth$order), ] +choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35)) + +library(ggthemes) +ggplot(choropleth, aes(long, lat, group = group)) + + geom_polygon(aes(fill = rate_d), colour = alpha("white", 1 / 4), size = 0.2) + + geom_polygon(data = state_df, colour = "white", fill = NA) + + scale_fill_brewer(palette = "PuRd") + + labs( + fill = "ratio", title = "ratio of unemployment by county, 2009", + caption = "data source: http://datasets.flowingdata.com/unemployment09.csv" + ) + + coord_map("polyconic") + + theme_map() +``` + + +```{r} +# 来自帮助文档 ?map +library(mapproj) # mapproj is used for projection="polyconic" +# color US county map by 2009 unemployment rate +# match counties to map using FIPS county codes +# Based on J's solution to the "Choropleth Challenge" +# http://blog.revolutionanalytics.com/2009/11/choropleth-challenge-result.html + +# load data +# unemp includes data for some counties not on the "lower 48 states" county +# map, such as those in Alaska, Hawaii, Puerto Rico, and some tiny Virginia cities +data(unemp) +data(county.fips) + +# define color buckets +colors <- c("#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043") +unemp$colorBuckets <- as.numeric(cut(unemp$unemp, c(0, 2, 4, 6, 8, 10, 100))) +leg.txt <- c("<2%", "2-4%", "4-6%", "6-8%", "8-10%", ">10%") + +# align data with map definitions by (partial) matching state,county +# names, which include multiple polygons for some counties +cnty.fips <- county.fips$fips[match( + map("county", plot = FALSE)$names, + county.fips$polyname +)] +colorsmatched <- unemp$colorBuckets[match(cnty.fips, unemp$fips)] + +# draw map +map("county", + col = colors[colorsmatched], fill = TRUE, resolution = 0, + lty = 0, projection = "polyconic" +) +map("state", + col = "white", fill = FALSE, add = TRUE, lty = 1, lwd = 0.2, + projection = "polyconic" +) +title("unemployment by county, 2009") +legend("topright", leg.txt, horiz = TRUE, fill = colors) +``` + +美国各地区失业率地图,配不同颜色, [colormap](https://github.com/bhaskarvk/colormap) 适合给静态图配色 diff --git a/spatio-temporal-data.Rmd b/spatio-temporal-data.Rmd new file mode 100644 index 000000000..f04db7dc9 --- /dev/null +++ b/spatio-temporal-data.Rmd @@ -0,0 +1,6 @@ +# (PART) 时空数据 {-} + +# 介绍 {#chap-spatio-temporal-data .unnumbered} + +数据建模 + diff --git a/statistical-foundations.Rmd b/statistical-foundations.Rmd new file mode 100644 index 000000000..fe163b7ec --- /dev/null +++ b/statistical-foundations.Rmd @@ -0,0 +1,5 @@ +# (PART) 统计基础 {-} + +# 介绍 {#chap-statistical-foundations .unnumbered} + +统计基础 diff --git a/statistical-graphics.Rmd b/statistical-graphics.Rmd new file mode 100644 index 000000000..15d26700f --- /dev/null +++ b/statistical-graphics.Rmd @@ -0,0 +1,5 @@ +# (PART) 统计图形 {-} + +# 介绍 {#chap-statistical-graphics .unnumbered} + +统计图形 diff --git a/statistical-models.Rmd b/statistical-models.Rmd new file mode 100644 index 000000000..afe241087 --- /dev/null +++ b/statistical-models.Rmd @@ -0,0 +1,6 @@ +# (PART) 统计模型 {-} + +# 介绍 {#chap-statistical-models .unnumbered} + +统计模型 + diff --git a/string-operations.Rmd b/string-operations.Rmd index 069c4994f..dc0051c6b 100644 --- a/string-operations.Rmd +++ b/string-operations.Rmd @@ -744,6 +744,21 @@ shopping_list str_replace(shopping_list, pattern = "\\\\d", replace = "aa") ``` +### tolower + +tolower 和 toupper 是一对,将大写转小写,小写转大写 + +```{r} +simpleCap <- function(x) { + x <- tolower(x) + s <- strsplit(x, " ")[[1]] + paste(toupper(substring(s, 1, 1)), substring(s, 2), + sep = "", collapse = " " + ) +} +# 参考文献条目里需要将每个英文单词的首字母大写 +simpleCap(x = "THE USE OF MULTIPLE MEASUREMENTS IN TAXONOMIC PROBLEMS") +``` ## 字符串加密 {#encode-string} diff --git a/style.css b/style.css index 8447254e0..124d0cde9 100644 --- a/style.css +++ b/style.css @@ -1,3 +1,7 @@ +.title { + color: firebrick; +} + .caption { color: #777; margin-top: 10px; diff --git a/symbolic-computation.Rmd b/symbolic-computation.Rmd index 6214ee246..8f1187768 100644 --- a/symbolic-computation.Rmd +++ b/symbolic-computation.Rmd @@ -1,6 +1,6 @@ # 符号计算 {#chap-symbolic-computation} -相比于数值计算,符号计算可以无限精度,包括微分、积分运法,求解线性、非线性方程(组),常微分、偏微分方程(组)等,R 自带几个函数如 `deriv()`、`D()` 等可以做一些简单的微分运算,扩展包 [Ryacas](https://github.com/r-cas/ryacas) 提供 [Yacas](https://github.com/grzegorzmazur/yacas/) 核心计算引擎,[symengine](https://github.com/symengine/symengine.R) 引入 C++ 符号计算库[SymEngine](https://github.com/symengine/),相比于 **Ryacas**,**symengine** 不会和 Base R 函数冲突。Python 的符号计算模块 [sympy](https://github.com/sympy/sympy) [@SymPy] 不仅支持简单的四则运算,还支持微分、积分、解方程等,详见官方文档 。 +相比于数值计算,符号计算可以无限精度,包括微分、积分运法,求解线性、非线性方程(组),常微分、偏微分方程(组)等,R 自带几个函数如 `deriv()`、`D()` 等可以做一些简单的微分运算,扩展包 [Ryacas](https://github.com/r-cas/ryacas) 提供 [Yacas](https://github.com/grzegorzmazur/yacas/) 核心计算引擎,[symengine](https://github.com/symengine/symengine.R) 引入 C++ 符号计算库[SymEngine](https://github.com/symengine/),相比于 **Ryacas**[@Ryacas],**symengine** 不会和 Base R 函数冲突。Python 的符号计算模块 [sympy](https://github.com/sympy/sympy) [@SymPy] 不仅支持简单的四则运算,还支持微分、积分、解方程等,详见官方文档 。 16年在统计之都灌水[符号计算与R语言](https://cosx.org/2016/07/r-symbol-calculate),相应的 Rmd 源文件放在[Github](https://github.com/XiangyunHuang/Symbolic-Computing)上。 diff --git a/text-analysis.Rmd b/text-analysis.Rmd index 93191cf51..e3cf0ebb2 100644 --- a/text-analysis.Rmd +++ b/text-analysis.Rmd @@ -1,7 +1,10 @@ # 文本分析 {#chap-text-analysis} +[Supervised Machine Learning for Text Analysis in R](https://smltar.com/) 和 [Tidy Text Mining with R](https://www.tidytextmining.com/) + [PDFR](https://github.com/AllanCameron/PDFR) 和 [pdftools](https://github.com/ropensci/pdftools) 从 PDF 文档抽取文本, [tesseract](https://github.com/ropensci/tesseract) 从扫描件中抽取文本 +[quanteda](https://github.com/quanteda/quanteda) [fastTextR](https://github.com/FlorianSchwendinger/fastTextR) diff --git a/web-documents.Rmd b/web-documents.Rmd new file mode 100644 index 000000000..c01ae99dd --- /dev/null +++ b/web-documents.Rmd @@ -0,0 +1,93 @@ +# 网页文档 {#chap-web-documents} + +丘怡轩开发的 [prettydoc](https://github.com/yixuan/prettydoc) 包提供了一系列模版,方便快速提高网页逼格。另有 Atsushi Yasumoto 开发的 [minidown](https://github.com/atusy/minidown) 包非常轻量,但是常用功能都覆盖了。 + +## 幻灯片 {#sec-slides} + +谢益辉开发的 [xaringan](https://github.com/yihui/xaringan) 用于制作网页幻灯片, +[xaringanthemer](https://github.com/gadenbuie/xaringanthemer) 为 xaringan 提供主题定制, +[xaringanExtra](https://github.com/gadenbuie/xaringanExtra) 在 xaringan 之上提供各种功能扩展, +[xaringanBuilder](https://github.com/jhelvy/xaringanBuilder) 为 xaringan 提供多种输出格式。 + + +## 电子邮件 {#sec-emails} + +[^blastula-group-emails]: + +[emayili](https://github.com/datawookie/emayili) 是非常轻量的实现邮件发送的 R 包,其它功能类似的 R 包有 [blastula](https://github.com/rich-iannone/blastula) [mailR](https://github.com/rpremraj/mailR)。Rahul Premraj 基于 rJava 开发的 [mailR](https://github.com/rpremraj/mailR) 虽然还未在 CRAN 上正式发布,但是已得到很多人的关注,也被广泛的使用,目前作者已经不维护了,继续使用有一定风险。 RStudio 公司 Richard Iannone 新开发的 [blastula](https://github.com/rich-iannone/blastula) 扔掉了 Java 的重依赖,更加轻量化、现代化,支持发送群组邮件[^blastula-group-emails]。 [curl](https://github.com/jeroen/curl) 包提供的函数 `send_mail()` 本质上是在利用 [curl](https://curl.haxx.se/) 软件发送邮件,举个例子,邮件内容如下: + +``` +From: "黄湘云" <邮箱地址> +To: "黄湘云" <邮箱地址> +Subject: 测试邮件 + +你好: + +这是一封测试邮件! +``` + +将邮件内容保存为 mail.txt 文件,然后使用 curl 命令行工具将邮件内容发出去。 + +```{bash, eval=FALSE} +curl --url 'smtp://公司邮件服务器地址:开放的端口号' \ + --ssl-reqd --mail-from '发件人邮箱地址' \ + --mail-rcpt '收件人邮箱地址' \ + --upload-file data/mail.txt \ + --user '发件人邮箱地址:邮箱登陆密码' +``` + +::: {.rmdnote data-latex="{注意}"} +Gmail 出于安全性考虑,不支持这种发送邮件的方式,会将邮件内容阻挡,进而接收不到邮件。 +::: + +下面以 blastula 包为例怎么支持 Gmail/Outlook/QQ 等邮件发送,先安装系统软件依赖,CentOS 8 上安装依赖 + +```bash +sudo dnf install -y libsecret-devel libsodium-devel +``` + +然后安装 [**keyring**]() 和 [**blastula**]() + +```{r, eval=FALSE} +install.packages(c("keyring", "blastula")) +``` + +接着配置邮件帐户,这一步需要邮件账户名和登陆密码,配置一次就够了,不需要每次发送邮件的时候都配置一次 + +```{r, eval=FALSE} +library(blastula) +create_smtp_creds_key( + id = "outlook", + user = "xiangyunfaith@outlook.com", + provider = "outlook" +) +``` + +第二步,准备邮件内容,包括邮件主题、发件人、收件人、抄送人、密送人、邮件主体和附件等。 + +```{r, eval=FALSE} +attachment <- "data/mail.txt" # 如果没有附件,引号内留空即可。 +# 这个Rmd文件渲染后就是邮件的正文,交互图形和交互表格不适用 +body <- "examples/html-document.Rmd" +# 渲染邮件内容,生成预览 +email <- render_email(body) |> + add_attachment(file = attachment) +email +``` + +最后,发送邮件 + +```{r, eval=FALSE} +smtp_send( + from = c("张三" = "xxx@outlook.com"), # 发件人 + to = c("李四" = "xxx@foxmail.com", + "王五" = "xxx@gmail.com"), # 收件人 + cc = c("赵六" = "xxx@outlook.com"), # 抄送人 + subject = "这是一封测试邮件", + email = email, + credentials = creds_key(id = "outlook") +) +``` + +密送人实现群发单显,即一封邮件同时发送给多个人,每个收件人只能看到发件人地址而看不到其它收件人地址。 +