Skip to content

Commit f5a188b

Browse files
Merge pull request #35 from creative-commoners/pulls/2/php81
ENH PHP 8.1 compatibility
2 parents 0b3496f + 3e9308f commit f5a188b

File tree

14 files changed

+147
-147
lines changed

14 files changed

+147
-147
lines changed

code/HTML5Value.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ function ($no, $str) {
1919

2020
// Use HTML5 parser to parse the HTML fragment
2121
try {
22-
$content = str_replace("\r\n", "\n", $content);
22+
$content = str_replace("\r\n", "\n", $content ?? '');
2323
$parserPath = implode(
2424
DIRECTORY_SEPARATOR,
2525
[

thirdparty/html5lib-php/bin/html5-parse.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@
66
} else {
77
$file = $argv[1];
88
}
9-
$result = HTML5_Parser::parse(file_get_contents($file));
9+
$result = HTML5_Parser::parse(file_get_contents($file ?? ''));
1010
// nop

thirdparty/html5lib-php/library/HTML5/Data.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public static function getRealCodepoint($ref) {
6363
public static function getNamedCharacterReferences() {
6464
if (!self::$namedCharacterReferences) {
6565
self::$namedCharacterReferences = unserialize(
66-
file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
66+
file_get_contents(dirname(__FILE__) . '/named-character-references.ser') ?? '');
6767
}
6868
return self::$namedCharacterReferences;
6969
}
@@ -103,10 +103,10 @@ public static function utf8chr($code) {
103103
}
104104
// set up the actual character
105105
$ret = '';
106-
if($w) $ret .= chr($w);
107-
if($z) $ret .= chr($z);
108-
if($y) $ret .= chr($y);
109-
$ret .= chr($x);
106+
if($w) $ret .= chr($w ?? 0);
107+
if($z) $ret .= chr($z ?? 0);
108+
if($y) $ret .= chr($y ?? 0);
109+
$ret .= chr($x ?? 0);
110110

111111
return $ret;
112112
}

thirdparty/html5lib-php/library/HTML5/InputStream.php

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,16 @@ public function __construct($data) {
7373
// omitted.
7474
if (extension_loaded('iconv')) {
7575
// non-conforming
76-
$data = @iconv('UTF-8', 'UTF-8//IGNORE', $data);
76+
$data = @iconv('UTF-8', 'UTF-8//IGNORE', $data ?? '');
7777
} else {
7878
// we can make a conforming native implementation
7979
throw new Exception('Not implemented, please install mbstring or iconv');
8080
}
8181

8282
/* One leading U+FEFF BYTE ORDER MARK character must be
8383
ignored if any are present. */
84-
if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
85-
$data = substr($data, 3);
84+
if (substr($data ?? '', 0, 3) === "\xEF\xBB\xBF") {
85+
$data = substr($data ?? '', 3);
8686
}
8787

8888
/* All U+0000 NULL characters in the input must be replaced
@@ -112,7 +112,7 @@ public function __construct($data) {
112112
"\n",
113113
"\n"
114114
),
115-
$data
115+
$data ?? ''
116116
);
117117

118118
/* Any occurrences of any characters in the ranges U+0001 to
@@ -141,7 +141,7 @@ public function __construct($data) {
141141
|
142142
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
143143
)/x',
144-
$data,
144+
$data ?? '',
145145
$matches
146146
);
147147
for ($i = 0; $i < $count; $i++) {
@@ -156,7 +156,7 @@ public function __construct($data) {
156156

157157
$this->data = $data;
158158
$this->char = 0;
159-
$this->EOF = strlen($data);
159+
$this->EOF = strlen($data ?? '');
160160
}
161161

162162
/**
@@ -167,7 +167,7 @@ public function getCurrentLine() {
167167
if($this->EOF) {
168168
// Add one to $this->char because we want the number for the next
169169
// byte to be processed.
170-
return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
170+
return substr_count($this->data ?? '', "\n", 0, min($this->char, $this->EOF)) + 1;
171171
} else {
172172
// If the string is empty, we are on the first line (sorta).
173173
return 1;
@@ -183,29 +183,29 @@ public function getColumnOffset() {
183183
// one (to make it point to the next character, the one we want the
184184
// position of) added to it because strrpos's behaviour includes the
185185
// final offset byte.
186-
$lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data));
186+
$lastLine = strrpos($this->data ?? '', "\n", $this->char - 1 - strlen($this->data ?? ''));
187187

188188
// However, for here we want the length up until the next byte to be
189189
// processed, so add one to the current byte ($this->char).
190190
if($lastLine !== false) {
191-
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
191+
$findLengthOf = substr($this->data ?? '', $lastLine + 1, $this->char - 1 - $lastLine);
192192
} else {
193-
$findLengthOf = substr($this->data, 0, $this->char);
193+
$findLengthOf = substr($this->data ?? '', 0, $this->char);
194194
}
195195

196196
// Get the length for the string we need.
197197
if(extension_loaded('iconv')) {
198-
return iconv_strlen($findLengthOf, 'utf-8');
198+
return iconv_strlen($findLengthOf ?? '', 'utf-8');
199199
} elseif(extension_loaded('mbstring')) {
200-
return mb_strlen($findLengthOf, 'utf-8');
200+
return mb_strlen($findLengthOf ?? '', 'utf-8');
201201
} elseif(extension_loaded('xml')) {
202-
return strlen(utf8_decode($findLengthOf));
202+
return strlen(utf8_decode($findLengthOf ?? ''));
203203
} else {
204-
$count = count_chars($findLengthOf);
204+
$count = count_chars($findLengthOf ?? '');
205205
// 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
206206
// 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
207-
return array_sum(array_slice($count, 0, 0x80)) +
208-
array_sum(array_slice($count, 0xC2, 0x33));
207+
return array_sum(array_slice($count ?? [], 0, 0x80)) +
208+
array_sum(array_slice($count ?? [], 0xC2, 0x33));
209209
}
210210
}
211211

@@ -225,7 +225,7 @@ public function char() {
225225
*/
226226
public function remainingChars() {
227227
if($this->char < $this->EOF) {
228-
$data = substr($this->data, $this->char);
228+
$data = substr($this->data ?? '', $this->char ?? 0);
229229
$this->char = $this->EOF;
230230
return $data;
231231
} else {
@@ -241,11 +241,11 @@ public function remainingChars() {
241241
public function charsUntil($bytes, $max = null) {
242242
if ($this->char < $this->EOF) {
243243
if ($max === 0 || $max) {
244-
$len = strcspn($this->data, $bytes, $this->char, $max);
244+
$len = strcspn($this->data ?? '', $bytes ?? '', $this->char ?? 0, $max);
245245
} else {
246-
$len = strcspn($this->data, $bytes, $this->char);
246+
$len = strcspn($this->data ?? '', $bytes ?? '', $this->char ?? 0);
247247
}
248-
$string = (string) substr($this->data, $this->char, $len);
248+
$string = (string) substr($this->data ?? '', $this->char ?? 0, $len);
249249
$this->char += $len;
250250
return $string;
251251
} else {
@@ -261,11 +261,11 @@ public function charsUntil($bytes, $max = null) {
261261
public function charsWhile($bytes, $max = null) {
262262
if ($this->char < $this->EOF) {
263263
if ($max === 0 || $max) {
264-
$len = strspn($this->data, $bytes, $this->char, $max);
264+
$len = strspn($this->data ?? '', $bytes ?? '', $this->char ?? 0, $max);
265265
} else {
266-
$len = strspn($this->data, $bytes, $this->char);
266+
$len = strspn($this->data ?? '', $bytes ?? '', $this->char ?? 0);
267267
}
268-
$string = (string) substr($this->data, $this->char, $len);
268+
$string = (string) substr($this->data ?? '', $this->char ?? 0, $len);
269269
$this->char += $len;
270270
return $string;
271271
} else {

thirdparty/html5lib-php/library/HTML5/Tokenizer.php

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ public function parse() {
135135
/* Consume the next input character */
136136
$char = $this->stream->char();
137137
$lastFourChars .= $char;
138-
if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
138+
if (strlen($lastFourChars ?? '') > 4) $lastFourChars = substr($lastFourChars ?? '', -4);
139139

140140
// see below for meaning
141141
$hyp_cond =
@@ -213,7 +213,7 @@ public function parse() {
213213
} elseif(
214214
$char === '>' &&
215215
$gt_cond &&
216-
substr($lastFourChars, 1) === '-->'
216+
substr($lastFourChars ?? '', 1) === '-->'
217217
) {
218218
/* If the content model flag is set to either the RCDATA state or
219219
the CDATA state, and the escape flag is true, and the last three
@@ -248,7 +248,7 @@ public function parse() {
248248
'data' => $char . $chars
249249
));
250250
$lastFourChars .= $chars;
251-
if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
251+
if (strlen($lastFourChars ?? '') > 4) $lastFourChars = substr($lastFourChars ?? '', -4);
252252

253253
} else {
254254
/* Anything else
@@ -274,7 +274,7 @@ public function parse() {
274274
));
275275

276276
$lastFourChars .= $chars;
277-
if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
277+
if (strlen($lastFourChars ?? '') > 4) $lastFourChars = substr($lastFourChars ?? '', -4);
278278

279279
$state = 'data';
280280
}
@@ -351,7 +351,7 @@ public function parse() {
351351
point), then switch to the tag name state. (Don't emit the token
352352
yet; further details will be filled in before it is emitted.) */
353353
$this->token = array(
354-
'name' => strtolower($char),
354+
'name' => strtolower($char ?? ''),
355355
'type' => self::STARTTAG,
356356
'attr' => array()
357357
);
@@ -427,7 +427,7 @@ public function parse() {
427427
) {
428428
/* If the content model flag is set to the RCDATA or CDATA
429429
states... */
430-
$name = strtolower($this->stream->charsWhile(self::ALPHA));
430+
$name = strtolower($this->stream->charsWhile(self::ALPHA) ?? '');
431431
$following = $this->stream->char();
432432
$this->stream->unget();
433433
if (
@@ -489,7 +489,7 @@ public function parse() {
489489
switch to the tag name state. (Don't emit the token yet; further details
490490
will be filled in before it is emitted.) */
491491
$this->token = array(
492-
'name' => strtolower($char),
492+
'name' => strtolower($char ?? ''),
493493
'type' => self::ENDTAG
494494
);
495495

@@ -635,7 +635,7 @@ public function parse() {
635635
point), and its value to the empty string. Switch to the
636636
attribute name state.*/
637637
$this->token['attr'][] = array(
638-
'name' => strtolower($char),
638+
'name' => strtolower($char ?? ''),
639639
'value' => ''
640640
);
641641

@@ -716,7 +716,7 @@ public function parse() {
716716
state. */
717717
$chars = $this->stream->charsWhile(self::UPPER_ALPHA);
718718

719-
$last = count($this->token['attr']) - 1;
719+
$last = count($this->token['attr'] ?? []) - 1;
720720
$this->token['attr'][$last]['name'] .= strtolower($char . $chars);
721721

722722
$state = 'attribute name';
@@ -750,7 +750,7 @@ public function parse() {
750750
Stay in the attribute name state. */
751751
$chars = $this->stream->charsUntil("\t\n\x0C /=>\"'" . self::UPPER_ALPHA);
752752

753-
$last = count($this->token['attr']) - 1;
753+
$last = count($this->token['attr'] ?? []) - 1;
754754
$this->token['attr'][$last]['name'] .= $char . $chars;
755755

756756
$state = 'attribute name';
@@ -803,7 +803,7 @@ public function parse() {
803803
point), and its value to the empty string. Switch to the
804804
attribute name state. */
805805
$this->token['attr'][] = array(
806-
'name' => strtolower($char),
806+
'name' => strtolower($char ?? ''),
807807
'value' => ''
808808
);
809809

@@ -910,7 +910,7 @@ public function parse() {
910910
/* Anything else
911911
Append the current input character to the current attribute's value.
912912
Switch to the attribute value (unquoted) state. */
913-
$last = count($this->token['attr']) - 1;
913+
$last = count($this->token['attr'] ?? []) - 1;
914914
$this->token['attr'][$last]['value'] .= $char;
915915

916916
$state = 'attribute value (unquoted)';
@@ -950,7 +950,7 @@ public function parse() {
950950
Stay in the attribute value (double-quoted) state. */
951951
$chars = $this->stream->charsUntil('"&');
952952

953-
$last = count($this->token['attr']) - 1;
953+
$last = count($this->token['attr'] ?? []) - 1;
954954
$this->token['attr'][$last]['value'] .= $char . $chars;
955955

956956
$state = 'attribute value (double-quoted)';
@@ -988,7 +988,7 @@ public function parse() {
988988
Stay in the attribute value (single-quoted) state. */
989989
$chars = $this->stream->charsUntil("'&");
990990

991-
$last = count($this->token['attr']) - 1;
991+
$last = count($this->token['attr'] ?? []) - 1;
992992
$this->token['attr'][$last]['value'] .= $char . $chars;
993993

994994
$state = 'attribute value (single-quoted)';
@@ -1049,7 +1049,7 @@ public function parse() {
10491049
Stay in the attribute value (unquoted) state. */
10501050
$chars = $this->stream->charsUntil("\t\n\x0c &>\"'=");
10511051

1052-
$last = count($this->token['attr']) - 1;
1052+
$last = count($this->token['attr'] ?? []) - 1;
10531053
$this->token['attr'][$last]['value'] .= $char . $chars;
10541054

10551055
$state = 'attribute value (unquoted)';
@@ -1179,7 +1179,7 @@ public function parse() {
11791179
/* Otherwise if the next seven characters are a case-insensitive match
11801180
for the word "DOCTYPE", then consume those characters and switch to the
11811181
DOCTYPE state. */
1182-
} elseif(strtoupper($alpha) === 'DOCTYPE') {
1182+
} elseif(strtoupper($alpha ?? '') === 'DOCTYPE') {
11831183
$state = 'DOCTYPE';
11841184

11851185
// XXX not implemented
@@ -1526,7 +1526,7 @@ public function parse() {
15261526
the character's code point). Switch to the DOCTYPE name
15271527
state. */
15281528
$this->token = array(
1529-
'name' => strtolower($char),
1529+
'name' => strtolower($char ?? ''),
15301530
'type' => self::DOCTYPE,
15311531
'error' => true
15321532
);
@@ -1589,7 +1589,7 @@ public function parse() {
15891589
Append the lowercase version of the input character
15901590
(add 0x0020 to the character's code point) to the current
15911591
DOCTYPE token's name. Stay in the DOCTYPE name state. */
1592-
$this->token['name'] .= strtolower($char);
1592+
$this->token['name'] .= strtolower($char ?? '');
15931593

15941594
} elseif($char === false) {
15951595
/* EOF
@@ -2289,7 +2289,7 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
22892289
// alphanumeric so we can just concat it to whatever we get later).
22902290
$this->stream->unget();
22912291
if ($char !== false) {
2292-
$chars = substr($chars, 0, -1);
2292+
$chars = substr($chars ?? '', 0, -1);
22932293
}
22942294

22952295
/* If no match can be made, then this is a parse error.
@@ -2305,7 +2305,7 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
23052305
/* If the last character matched is not a U+003B SEMICOLON
23062306
(;), there is a parse error. */
23072307
$semicolon = true;
2308-
if (substr($id, -1) !== ';') {
2308+
if (substr($id ?? '', -1) !== ';') {
23092309
$this->emitToken(array(
23102310
'type' => self::PARSEERROR,
23112311
'data' => 'named-entity-without-semicolon'
@@ -2324,8 +2324,8 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
23242324
and nothing is returned. */
23252325
if ($inattr && !$semicolon) {
23262326
// The next character is either the next character in $chars or in the stream.
2327-
if (strlen($chars) > strlen($id)) {
2328-
$next = substr($chars, strlen($id), 1);
2327+
if (strlen($chars ?? '') > strlen($id ?? '')) {
2328+
$next = substr($chars ?? '', strlen($id ?? ''), 1);
23292329
} else {
23302330
$next = $this->stream->char();
23312331
$this->stream->unget();
@@ -2342,7 +2342,7 @@ private function consumeCharacterReference($allowed = false, $inattr = false) {
23422342
/* Otherwise, return a character token for the character
23432343
corresponding to the character reference name (as given
23442344
by the second column of the named character references table). */
2345-
return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id));
2345+
return HTML5_Data::utf8chr($codepoint) . substr($chars ?? '', strlen($id ?? ''));
23462346
}
23472347
}
23482348

@@ -2359,7 +2359,7 @@ private function characterReferenceInAttributeValue($allowed = false) {
23592359
? '&'
23602360
: $entity;
23612361

2362-
$last = count($this->token['attr']) - 1;
2362+
$last = count($this->token['attr'] ?? []) - 1;
23632363
$this->token['attr'][$last]['value'] .= $char;
23642364

23652365
/* Finally, switch back to the attribute value state that you
@@ -2377,7 +2377,7 @@ protected function emitToken($token, $checkStream = true, $dry = false) {
23772377
}
23782378
}
23792379
if($token['type'] === self::ENDTAG && !empty($token['attr'])) {
2380-
for ($i = 0; $i < count($token['attr']); $i++) {
2380+
for ($i = 0; $i < count($token['attr'] ?? []); $i++) {
23812381
$this->emitToken(array(
23822382
'type' => self::PARSEERROR,
23832383
'data' => 'attributes-in-end-tag'

0 commit comments

Comments
 (0)