Skip to content

Commit

Permalink
Fix out-of-bound source code accesses in Lexer
Browse files Browse the repository at this point in the history
Signed-off-by: HyukWoo Park <[email protected]>
  • Loading branch information
clover2123 authored and ksh8281 committed Jul 16, 2024
1 parent bddd8a8 commit 6c0926c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 28 deletions.
32 changes: 21 additions & 11 deletions src/parser/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,7 @@ void Scanner::scanPunctuator(Scanner::ScannerResult* token, char16_t ch)

case '.':
kind = Period;
if (this->peekChar() == '.' && this->sourceCharAt(this->index + 1) == '.') {
if (this->peekChar() == '.' && this->peekChar(this->index + 1) == '.') {
// Spread operator "..."
this->index += 2;
kind = PeriodPeriodPeriod;
Expand Down Expand Up @@ -2354,8 +2354,16 @@ void Scanner::lex(Scanner::ScannerResult* token)
}

if (isIdentifierStart(cp)) {
goto ScanID;
this->scanIdentifier(token, cp);
return;
}

// common cases : `(` and `)` and `;`
if (cp == 0x28 || cp == 0x29 || cp == 0x3B) {
this->scanPunctuator(token, cp);
return;
}

// String literal starts with single quote (U+0027) or double quote (U+0022).
if (cp == 0x27 || cp == 0x22) {
this->scanStringLiteral(token);
Expand All @@ -2364,8 +2372,12 @@ void Scanner::lex(Scanner::ScannerResult* token)

// Dot (.) U+002E can also start a floating-point number, hence the need
// to check the next character.
if (UNLIKELY(cp == 0x2E) && isDecimalDigit(this->sourceCharAt(this->index + 1))) {
this->scanNumericLiteral(token);
if (cp == 0x2E) {
if (UNLIKELY(isDecimalDigit(this->peekChar(this->index + 1)))) {
this->scanNumericLiteral(token);
return;
}
this->scanPunctuator(token, cp);
return;
}

Expand All @@ -2374,21 +2386,19 @@ void Scanner::lex(Scanner::ScannerResult* token)
return;
}

if (UNLIKELY(cp == '`')) {
if (UNLIKELY(cp == 0x60)) {
++this->index;
this->scanTemplate(token, true);
return;
}

// Possible identifier start in a surrogate pair.
if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF) && isIdentifierStart(this->codePointAt(this->index))) {
goto ScanID;
if (UNLIKELY(cp >= 0xD800 && cp < 0xDFFF && isIdentifierStart(this->codePointAt(this->index)))) {
this->scanIdentifier(token, cp);
return;
}

this->scanPunctuator(token, cp);
return;

ScanID:
this->scanIdentifier(token, cp);
return;
}
} // namespace Escargot
35 changes: 19 additions & 16 deletions src/parser/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ class Scanner {

ALWAYS_INLINE char16_t sourceCharAt(const size_t idx) const
{
ASSERT(idx < this->length);
return sourceCodeAccessData.charAt(idx);
}

Expand All @@ -597,20 +598,20 @@ class Scanner {
{
bool start = (this->index == 0);
while (LIKELY(!this->eof())) {
char16_t ch = this->sourceCharAt(this->index);
char16_t ch = this->peekCharWithoutEOF();

if (isWhiteSpace(ch)) {
++this->index;
} else if (isLineTerminator(ch)) {
++this->index;
if (ch == 0x0D && this->sourceCharAt(this->index) == 0x0A) {
if (ch == 0x0D && this->peekChar(this->index) == 0x0A) {
++this->index;
}
++this->lineNumber;
this->lineStart = this->index;
start = true;
} else if (ch == 0x2F) { // U+002F is '/'
ch = this->sourceCharAt(this->index + 1);
ch = this->peekChar(this->index + 1);
if (ch == 0x2F) {
this->index += 2;
this->skipSingleLineComment();
Expand All @@ -621,29 +622,24 @@ class Scanner {
} else {
break;
}
} else if (start && ch == 0x2D) { // U+002D is '-'
} else if (start && ch == 0x2D && !this->isModule) { // U+002D is '-'
// U+003E is '>'
if (!this->isModule && (this->sourceCharAt(this->index + 1) == 0x2D) && (this->sourceCharAt(this->index + 2) == 0x3E)) {
if ((this->peekChar(this->index + 1) == 0x2D) && (this->peekChar(this->index + 2) == 0x3E)) {
// '-->' is a single-line comment
this->index += 3;
this->skipSingleLineComment();
} else {
break;
}
} else if (ch == 0x3C) { // U+003C is '<'
if (!this->isModule && this->length > this->index + 4) {
if (this->sourceCharAt(this->index + 1) == '!'
&& this->sourceCharAt(this->index + 2) == '-'
&& this->sourceCharAt(this->index + 3) == '-') {
this->index += 4; // `<!--`
this->skipSingleLineComment();
} else {
break;
}
} else if (ch == 0x3C && !this->isModule) { // U+003C is '<'
if (this->peekChar(this->index + 1) == '!'
&& this->peekChar(this->index + 2) == '-'
&& this->peekChar(this->index + 3) == '-') {
this->index += 4; // `<!--`
this->skipSingleLineComment();
} else {
break;
}

} else {
break;
}
Expand Down Expand Up @@ -690,6 +686,7 @@ class Scanner {
private:
ALWAYS_INLINE char16_t peekCharWithoutEOF()
{
ASSERT(!this->eof());
return this->sourceCharAt(this->index);
}

Expand All @@ -698,6 +695,12 @@ class Scanner {
return UNLIKELY(this->eof()) ? 0 : this->sourceCharAt(this->index);
}

ALWAYS_INLINE char16_t peekChar(size_t idx)
{
// check EOF
return UNLIKELY(idx >= this->length) ? 0 : this->sourceCharAt(idx);
}

char32_t scanHexEscape(char prefix);
char32_t scanUnicodeCodePointEscape();

Expand Down
2 changes: 1 addition & 1 deletion test/vendortest

0 comments on commit 6c0926c

Please sign in to comment.