-
Notifications
You must be signed in to change notification settings - Fork 67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[HTML] Unstable per-line lexing of substyled PHP keywords #288
Comments
with following change in LexHTML.cxx: diff --git a/lexers/LexHTML.cxx b/lexers/LexHTML.cxx
index 51cac259..a9a96973 100644
--- a/lexers/LexHTML.cxx
+++ b/lexers/LexHTML.cxx
@@ -1190,12 +1190,13 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
if (isPHPScript && (startPos == 0)) {
initStyle = SCE_HPHP_DEFAULT;
}
+ printf("%d startPos=%zu, initStyle=%d\n", __LINE__, startPos, initStyle);
styler.StartAt(startPos);
std::string lastTag;
std::string prevWord;
PhpNumberState phpNumber;
std::string phpStringDelimiter;
- int StateToPrint = initStyle;
+ int StateToPrint = initStyle & 0xff;
int state = stateForPrintState(StateToPrint);
std::string makoBlockType;
int makoComment = 0;
@@ -1208,6 +1209,7 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
startPos = backLineStart;
}
state = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : SCE_H_DEFAULT;
+ printf("%d InTagState startPos=%zu, state=%d\n", __LINE__, startPos, state);
}
// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
if (isPHPStringState(state)) {
@@ -1218,6 +1220,7 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
}
if (startPos == 0)
state = SCE_H_DEFAULT;
+ printf("%d isPHPStringState startPos=%zu, state=%d\n", __LINE__, startPos, state);
}
styler.StartAt(startPos); SciTE output when adding a character, then deleted it. 1193 startPos=0, initStyle=0
1193 startPos=42, initStyle=119
1223 isPHPStringState startPos=3, state=198
1193 startPos=39, initStyle=-58 Negative |
PHP-288-11-15.patch @@ -1195,7 +1195,7 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
std::string prevWord;
PhpNumberState phpNumber;
std::string phpStringDelimiter;
- int StateToPrint = initStyle;
+ int StateToPrint = initStyle & 0xff;
int state = stateForPrintState(StateToPrint);
std::string makoBlockType;
int makoComment = 0;
@@ -1216,8 +1216,7 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
length++;
state = styler.StyleIndexAt(startPos);
}
- if (startPos == 0)
- state = SCE_H_DEFAULT;
+ state = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : SCE_H_DEFAULT;
}
styler.StartAt(startPos);
|
PHP-288-1116.patch
After these changes, diff --git a/lexers/LexHTML.cxx b/lexers/LexHTML.cxx
index 51cac259..b384368d 100644
--- a/lexers/LexHTML.cxx
+++ b/lexers/LexHTML.cxx
@@ -668,6 +668,10 @@ constexpr bool isPHPStringState(int state) noexcept {
(state == SCE_HPHP_COMPLEX_VARIABLE);
}
+constexpr bool StyleNeedsBacktrack(int state) noexcept {
+ return InTagState(state) || isPHPStringState(state);
+}
+
enum class AllowPHP : int {
None, // No PHP
PHP, // <?php and <?=
@@ -1190,7 +1194,6 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
if (isPHPScript && (startPos == 0)) {
initStyle = SCE_HPHP_DEFAULT;
}
- styler.StartAt(startPos);
std::string lastTag;
std::string prevWord;
PhpNumberState phpNumber;
@@ -1201,23 +1204,18 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int
int makoComment = 0;
std::string djangoBlockType;
// If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
- if (InTagState(state)) {
- while ((startPos > 0) && (InTagState(styler.StyleIndexAt(startPos - 1)))) {
+ // PHP string can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
+ if (StyleNeedsBacktrack(state)) {
+ while ((startPos > 0) && (StyleNeedsBacktrack(styler.StyleIndexAt(startPos - 1)))) {
const Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
length += startPos - backLineStart;
startPos = backLineStart;
}
- state = (startPos > 0) ? styler.StyleIndexAt(startPos - 1) : SCE_H_DEFAULT;
- }
- // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
- if (isPHPStringState(state)) {
- while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
- startPos--;
- length++;
- state = styler.StyleIndexAt(startPos);
+ if (startPos > 0) {
+ state = styler.StyleIndexAt(startPos - 1);
+ } else {
+ state = isPHPScript ? SCE_HPHP_DEFAULT : SCE_H_DEFAULT;
}
- if (startPos == 0)
- state = SCE_H_DEFAULT;
}
styler.StartAt(startPos); |
Merge backtracking loops and only check line end styles - don't exit from backtracking when non-string state found. Respect isPHPScript if backtracking reaches start. Remove redundant styler.StartAt(startPos);
Cross-posted from notepad-plus-plus/notepad-plus-plus#15801
Minimal reproduction
define a substyle for server-side PHP keywords, e.g.,
use a substyled keyword in a malformed PHP script, e.g., leave a trailing string:
open the script in SciTE, append at least one character to the file
The substyle will extend beyond the keyword until the end of the document:
Running
TestLexers
on the script also raises a "different per-line styles" error.The text was updated successfully, but these errors were encountered: