Skip to content

Commit 20f0b6a

Browse files
committed
update of public_suffix part
1 parent b6398fa commit 20f0b6a

File tree

1 file changed

+38
-36
lines changed

1 file changed

+38
-36
lines changed

vignettes/adaR.Rmd

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ vignette: >
99

1010
```{r, include = FALSE}
1111
knitr::opts_chunk$set(
12-
collapse = TRUE,
13-
comment = "#>"
12+
collapse = TRUE,
13+
comment = "#>"
1414
)
1515
```
1616

@@ -106,35 +106,35 @@ ada_url_parse("https://user_1:[email protected]:8080/dir/../api?q=1#frag")
106106
The function can deal with punycode and percent encoding and does generally handle all types of edge cases well.
107107
```{r ada_corner}
108108
corner_cases <- c(
109-
"https://example.com:8080", "http://user:[email protected]",
110-
"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080", "https://example.com/path/to/resource?query=value&another=thing#fragment",
111-
"http://sub.sub.example.com", "ftp://files.example.com:2121/download/file.txt",
112-
"http://example.com/path with spaces/and&special=characters?",
113-
"https://user:pa%[email protected]/path", "http://example.com/..//a/b/../c/./d.html",
114-
"https://example.com:8080/over/under?query=param#and-a-fragment",
115-
"http://192.168.0.1/path/to/resource", "http://3com.com/path/to/resource",
116-
"http://example.com/%7Eusername/", "https://example.com/a?query=value&query=value2",
117-
"https://example.com/a/b/c/..", "ws://websocket.example.com:9000/chat",
118-
"https://example.com:65535/edge-case-port", "file:///home/user/file.txt",
119-
"http://example.com/a/b/c/%2F%2F", "http://example.com/a/../a/../a/../a/",
120-
"https://example.com/./././a/", "http://example.com:8080/a;b?c=d#e",
121-
"http://@example.com", "http://example.com/@test", "http://example.com/@@@/a/b",
122-
"https://example.com:0/", "http://example.com/%25path%20with%20encoded%20chars",
123-
"https://example.com/path?query=%26%3D%3F%23", "http://example.com:8080/?query=value#fragment#fragment2",
124-
"https://example.xn--80akhbyknj4f/path/to/resource", "https://example.co.uk/path/to/resource",
125-
"http://username:pass%[email protected]", "ftp://downloads.example.edu:3030/files/archive.zip",
126-
"https://example.com:8080/this/is/a/deeply/nested/path/to/a/resource",
127-
"http://another-example.com/..//test/./demo.html", "https://sub2.sub1.example.org:5000/login?user=test#section2",
128-
"ws://chat.example.biz:5050/livechat", "http://192.168.1.100/a/b/c/d",
129-
"https://secure.example.shop/cart?item=123&quantity=5", "http://example.travel/%60%21%40%23%24%25%5E%26*()",
130-
"https://example.museum/path/to/artifact?search=ancient", "ftp://secure-files.example.co:4040/files/document.docx",
131-
"https://test.example.aero/booking?flight=abc123", "http://example.asia/%E2%82%AC%E2%82%AC/path",
132-
"http://subdomain.example.tel/contact?name=john", "ws://game-server.example.jobs:2020/match?id=xyz",
133-
"http://example.mobi/path/with/mobile/content", "https://example.name/family/tree?name=smith",
134-
"http://192.168.2.2/path?query1=value1&query2=value2", "http://example.pro/professional/services",
135-
"https://example.info/information/page", "http://example.int/internal/systems/login",
136-
"https://example.post/postal/services", "http://example.xxx/age/verification",
137-
"https://example.xxx/another/edge/case/path?with=query#and-fragment"
109+
"https://example.com:8080", "http://user:[email protected]",
110+
"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080", "https://example.com/path/to/resource?query=value&another=thing#fragment",
111+
"http://sub.sub.example.com", "ftp://files.example.com:2121/download/file.txt",
112+
"http://example.com/path with spaces/and&special=characters?",
113+
"https://user:pa%[email protected]/path", "http://example.com/..//a/b/../c/./d.html",
114+
"https://example.com:8080/over/under?query=param#and-a-fragment",
115+
"http://192.168.0.1/path/to/resource", "http://3com.com/path/to/resource",
116+
"http://example.com/%7Eusername/", "https://example.com/a?query=value&query=value2",
117+
"https://example.com/a/b/c/..", "ws://websocket.example.com:9000/chat",
118+
"https://example.com:65535/edge-case-port", "file:///home/user/file.txt",
119+
"http://example.com/a/b/c/%2F%2F", "http://example.com/a/../a/../a/../a/",
120+
"https://example.com/./././a/", "http://example.com:8080/a;b?c=d#e",
121+
"http://@example.com", "http://example.com/@test", "http://example.com/@@@/a/b",
122+
"https://example.com:0/", "http://example.com/%25path%20with%20encoded%20chars",
123+
"https://example.com/path?query=%26%3D%3F%23", "http://example.com:8080/?query=value#fragment#fragment2",
124+
"https://example.xn--80akhbyknj4f/path/to/resource", "https://example.co.uk/path/to/resource",
125+
"http://username:pass%[email protected]", "ftp://downloads.example.edu:3030/files/archive.zip",
126+
"https://example.com:8080/this/is/a/deeply/nested/path/to/a/resource",
127+
"http://another-example.com/..//test/./demo.html", "https://sub2.sub1.example.org:5000/login?user=test#section2",
128+
"ws://chat.example.biz:5050/livechat", "http://192.168.1.100/a/b/c/d",
129+
"https://secure.example.shop/cart?item=123&quantity=5", "http://example.travel/%60%21%40%23%24%25%5E%26*()",
130+
"https://example.museum/path/to/artifact?search=ancient", "ftp://secure-files.example.co:4040/files/document.docx",
131+
"https://test.example.aero/booking?flight=abc123", "http://example.asia/%E2%82%AC%E2%82%AC/path",
132+
"http://subdomain.example.tel/contact?name=john", "ws://game-server.example.jobs:2020/match?id=xyz",
133+
"http://example.mobi/path/with/mobile/content", "https://example.name/family/tree?name=smith",
134+
"http://192.168.2.2/path?query1=value1&query2=value2", "http://example.pro/professional/services",
135+
"https://example.info/information/page", "http://example.int/internal/systems/login",
136+
"https://example.post/postal/services", "http://example.xxx/age/verification",
137+
"https://example.xxx/another/edge/case/path?with=query#and-fragment"
138138
)
139139
140140
df <- ada_url_parse(corner_cases)
@@ -153,14 +153,16 @@ ada_has_search(corner_cases)
153153

154154
## Public suffic extraction
155155

156-
The package also implements a public suffix extractor `public_suffix()`, based on a
157-
lookup of the [Public Suffix List](https://publicsuffix.org/) (*The list also includes private top level domains, which we excluded from this function*).
156+
The package also implements a public suffix extractor public_suffix(), based on a lookup of the [Public Suffix List](https://publicsuffix.org/).
157+
Note that from this list, we only include registry suffixes (e.g., com, co.uk), which are those controlled by a domain name registry and governed by ICANN.
158+
We do not include "private" suffixes (e.g., blogspot.com) that allow people to register subdomains. Hence, we use the term domain in the sense of "top domain under a registry suffix".
159+
See <https://github.com/google/guava/wiki/InternetDomainNameExplained> for more details.
158160

159161
```{r public_suffix}
160162
urls <- c(
161-
"https://subsub.sub.domain.co.uk",
162-
"https://domain.api.gov.uk",
163-
"https://thisisnotpart.butthisispartoftheps.kawasaki.jp"
163+
"https://subsub.sub.domain.co.uk",
164+
"https://domain.api.gov.uk",
165+
"https://thisisnotpart.butthisispartoftheps.kawasaki.jp"
164166
)
165167
public_suffix(urls)
166168
```

0 commit comments

Comments
 (0)