@@ -9,8 +9,8 @@ vignette: >
9
9
10
10
``` {r, include = FALSE}
11
11
knitr::opts_chunk$set(
12
- collapse = TRUE,
13
- comment = "#>"
12
+ collapse = TRUE,
13
+ comment = "#>"
14
14
)
15
15
```
16
16
@@ -106,35 +106,35 @@ ada_url_parse("https://user_1:
[email protected] :8080/dir/../api?q=1#frag")
106
106
The function can deal with punycode and percent encoding and does generally handle all types of edge cases well.
107
107
``` {r ada_corner}
108
108
corner_cases <- c(
109
- "https://example.com:8080", "http://user:[email protected] ",
110
- "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080", "https://example.com/path/to/resource?query=value&another=thing#fragment",
111
- "http://sub.sub.example.com", "ftp://files.example.com:2121/download/file.txt",
112
- "http://example.com/path with spaces/and&special=characters?",
113
- "https://user:pa%[email protected] /path", "http://example.com/..//a/b/../c/./d.html",
114
- "https://example.com:8080/over/under?query=param#and-a-fragment",
115
- "http://192.168.0.1/path/to/resource", "http://3com.com/path/to/resource",
116
- "http://example.com/%7Eusername/", "https://example.com/a?query=value&query=value2",
117
- "https://example.com/a/b/c/..", "ws://websocket.example.com:9000/chat",
118
- "https://example.com:65535/edge-case-port", "file:///home/user/file.txt",
119
- "http://example.com/a/b/c/%2F%2F", "http://example.com/a/../a/../a/../a/",
120
- "https://example.com/./././a/", "http://example.com:8080/a;b?c=d#e",
121
- "http://@example.com", "http://example.com/@test", "http://example.com/@@@/a/b",
122
- "https://example.com:0/", "http://example.com/%25path%20with%20encoded%20chars",
123
- "https://example.com/path?query=%26%3D%3F%23", "http://example.com:8080/?query=value#fragment#fragment2",
124
- "https://example.xn--80akhbyknj4f/path/to/resource", "https://example.co.uk/path/to/resource",
125
- "http://username:pass%[email protected] ", "ftp://downloads.example.edu:3030/files/archive.zip",
126
- "https://example.com:8080/this/is/a/deeply/nested/path/to/a/resource",
127
- "http://another-example.com/..//test/./demo.html", "https://sub2.sub1.example.org:5000/login?user=test#section2",
128
- "ws://chat.example.biz:5050/livechat", "http://192.168.1.100/a/b/c/d",
129
- "https://secure.example.shop/cart?item=123&quantity=5", "http://example.travel/%60%21%40%23%24%25%5E%26*()",
130
- "https://example.museum/path/to/artifact?search=ancient", "ftp://secure-files.example.co:4040/files/document.docx",
131
- "https://test.example.aero/booking?flight=abc123", "http://example.asia/%E2%82%AC%E2%82%AC/path",
132
- "http://subdomain.example.tel/contact?name=john", "ws://game-server.example.jobs:2020/match?id=xyz",
133
- "http://example.mobi/path/with/mobile/content", "https://example.name/family/tree?name=smith",
134
- "http://192.168.2.2/path?query1=value1&query2=value2", "http://example.pro/professional/services",
135
- "https://example.info/information/page", "http://example.int/internal/systems/login",
136
- "https://example.post/postal/services", "http://example.xxx/age/verification",
137
- "https://example.xxx/another/edge/case/path?with=query#and-fragment"
109
+ "https://example.com:8080", "http://user:[email protected] ",
110
+ "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080", "https://example.com/path/to/resource?query=value&another=thing#fragment",
111
+ "http://sub.sub.example.com", "ftp://files.example.com:2121/download/file.txt",
112
+ "http://example.com/path with spaces/and&special=characters?",
113
+ "https://user:pa%[email protected] /path", "http://example.com/..//a/b/../c/./d.html",
114
+ "https://example.com:8080/over/under?query=param#and-a-fragment",
115
+ "http://192.168.0.1/path/to/resource", "http://3com.com/path/to/resource",
116
+ "http://example.com/%7Eusername/", "https://example.com/a?query=value&query=value2",
117
+ "https://example.com/a/b/c/..", "ws://websocket.example.com:9000/chat",
118
+ "https://example.com:65535/edge-case-port", "file:///home/user/file.txt",
119
+ "http://example.com/a/b/c/%2F%2F", "http://example.com/a/../a/../a/../a/",
120
+ "https://example.com/./././a/", "http://example.com:8080/a;b?c=d#e",
121
+ "http://@example.com", "http://example.com/@test", "http://example.com/@@@/a/b",
122
+ "https://example.com:0/", "http://example.com/%25path%20with%20encoded%20chars",
123
+ "https://example.com/path?query=%26%3D%3F%23", "http://example.com:8080/?query=value#fragment#fragment2",
124
+ "https://example.xn--80akhbyknj4f/path/to/resource", "https://example.co.uk/path/to/resource",
125
+ "http://username:pass%[email protected] ", "ftp://downloads.example.edu:3030/files/archive.zip",
126
+ "https://example.com:8080/this/is/a/deeply/nested/path/to/a/resource",
127
+ "http://another-example.com/..//test/./demo.html", "https://sub2.sub1.example.org:5000/login?user=test#section2",
128
+ "ws://chat.example.biz:5050/livechat", "http://192.168.1.100/a/b/c/d",
129
+ "https://secure.example.shop/cart?item=123&quantity=5", "http://example.travel/%60%21%40%23%24%25%5E%26*()",
130
+ "https://example.museum/path/to/artifact?search=ancient", "ftp://secure-files.example.co:4040/files/document.docx",
131
+ "https://test.example.aero/booking?flight=abc123", "http://example.asia/%E2%82%AC%E2%82%AC/path",
132
+ "http://subdomain.example.tel/contact?name=john", "ws://game-server.example.jobs:2020/match?id=xyz",
133
+ "http://example.mobi/path/with/mobile/content", "https://example.name/family/tree?name=smith",
134
+ "http://192.168.2.2/path?query1=value1&query2=value2", "http://example.pro/professional/services",
135
+ "https://example.info/information/page", "http://example.int/internal/systems/login",
136
+ "https://example.post/postal/services", "http://example.xxx/age/verification",
137
+ "https://example.xxx/another/edge/case/path?with=query#and-fragment"
138
138
)
139
139
140
140
df <- ada_url_parse(corner_cases)
@@ -153,14 +153,16 @@ ada_has_search(corner_cases)
153
153
154
154
## Public suffic extraction
155
155
156
- The package also implements a public suffix extractor ` public_suffix() ` , based on a
157
- lookup of the [ Public Suffix List] ( https://publicsuffix.org/ ) (* The list also includes private top level domains, which we excluded from this function* ).
156
+ The package also implements a public suffix extractor public_suffix(), based on a lookup of the [ Public Suffix List] ( https://publicsuffix.org/ ) .
157
+ Note that from this list, we only include registry suffixes (e.g., com, co.uk), which are those controlled by a domain name registry and governed by ICANN.
158
+ We do not include "private" suffixes (e.g., blogspot.com) that allow people to register subdomains. Hence, we use the term domain in the sense of "top domain under a registry suffix".
159
+ See < https://github.com/google/guava/wiki/InternetDomainNameExplained > for more details.
158
160
159
161
``` {r public_suffix}
160
162
urls <- c(
161
- "https://subsub.sub.domain.co.uk",
162
- "https://domain.api.gov.uk",
163
- "https://thisisnotpart.butthisispartoftheps.kawasaki.jp"
163
+ "https://subsub.sub.domain.co.uk",
164
+ "https://domain.api.gov.uk",
165
+ "https://thisisnotpart.butthisispartoftheps.kawasaki.jp"
164
166
)
165
167
public_suffix(urls)
166
168
```
0 commit comments