Skip to content

Commit 47c9ed0

Browse files
authored
Consider 2-character EOL before line continuation (astral-sh#12035)
## Summary This PR fixes a bug introduced in astral-sh#12008 which didn't consider the two character newline after the line continuation character. For example, consider the following code highlighted with whitespaces: ```py call(foo # comment \\r\n \r\n def bar():\r\n ....pass\r\n ``` The lexer is at `def` when it's running the re-lexing logic and trying to move back to a newline character. It encounters `\n` and it's being escaped (incorrect) but `\r` is being escaped, so it moves the lexer to `\n` character. This creates an overlap in token ranges which causes the panic. ``` Name 0..4 Lpar 4..5 Name 5..8 Comment 9..20 NonLogicalNewline 20..22 <-- overlap between Newline 21..22 <-- these two tokens NonLogicalNewline 22..23 Def 23..26 ... ``` fixes: astral-sh#12028 ## Test Plan Add a test case with line continuation and windows style newline character.
1 parent 7cb2619 commit 47c9ed0

File tree

4 files changed

+125
-17
lines changed

4 files changed

+125
-17
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf
88
crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf
99
crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf
1010

11+
crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py text eol=crlf
1112
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf
1213
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr
1314

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
call(a, b, # comment \
2+
3+
def bar():
4+
pass

crates/ruff_python_parser/src/lexer.rs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,26 +1393,40 @@ impl<'src> Lexer<'src> {
13931393
while let Some(ch) = reverse_chars.next() {
13941394
if is_python_whitespace(ch) {
13951395
current_position -= ch.text_len();
1396-
} else if matches!(ch, '\n' | '\r') {
1397-
current_position -= ch.text_len();
1398-
// Count the number of backslashes before the newline character.
1399-
let mut backslash_count = 0;
1400-
while reverse_chars.next_if_eq(&'\\').is_some() {
1401-
backslash_count += 1;
1402-
}
1403-
if backslash_count == 0 {
1404-
// No escapes: `\n`
1405-
newline_position = Some(current_position);
1406-
} else {
1407-
if backslash_count % 2 == 0 {
1408-
// Even number of backslashes i.e., all backslashes cancel each other out
1409-
// which means the newline character is not being escaped.
1410-
newline_position = Some(current_position);
1396+
continue;
1397+
}
1398+
1399+
match ch {
1400+
'\n' => {
1401+
current_position -= ch.text_len();
1402+
if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') {
1403+
current_position -= carriage_return.text_len();
14111404
}
1412-
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
14131405
}
1406+
'\r' => {
1407+
current_position -= ch.text_len();
1408+
}
1409+
_ => break,
1410+
}
1411+
1412+
debug_assert!(matches!(ch, '\n' | '\r'));
1413+
1414+
// Count the number of backslashes before the newline character.
1415+
let mut backslash_count = 0;
1416+
while reverse_chars.next_if_eq(&'\\').is_some() {
1417+
backslash_count += 1;
1418+
}
1419+
1420+
if backslash_count == 0 {
1421+
// No escapes: `\n`
1422+
newline_position = Some(current_position);
14141423
} else {
1415-
break;
1424+
if backslash_count % 2 == 0 {
1425+
// Even number of backslashes i.e., all backslashes cancel each other out
1426+
// which means the newline character is not being escaped.
1427+
newline_position = Some(current_position);
1428+
}
1429+
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
14161430
}
14171431
}
14181432

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
---
2+
source: crates/ruff_python_parser/tests/fixtures.rs
3+
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py
4+
---
5+
## AST
6+
7+
```
8+
Module(
9+
ModModule {
10+
range: 0..46,
11+
body: [
12+
Expr(
13+
StmtExpr {
14+
range: 0..10,
15+
value: Call(
16+
ExprCall {
17+
range: 0..10,
18+
func: Name(
19+
ExprName {
20+
range: 0..4,
21+
id: "call",
22+
ctx: Load,
23+
},
24+
),
25+
arguments: Arguments {
26+
range: 4..10,
27+
args: [
28+
Name(
29+
ExprName {
30+
range: 5..6,
31+
id: "a",
32+
ctx: Load,
33+
},
34+
),
35+
Name(
36+
ExprName {
37+
range: 8..9,
38+
id: "b",
39+
ctx: Load,
40+
},
41+
),
42+
],
43+
keywords: [],
44+
},
45+
},
46+
),
47+
},
48+
),
49+
FunctionDef(
50+
StmtFunctionDef {
51+
range: 26..46,
52+
is_async: false,
53+
decorator_list: [],
54+
name: Identifier {
55+
id: "bar",
56+
range: 30..33,
57+
},
58+
type_params: None,
59+
parameters: Parameters {
60+
range: 33..35,
61+
posonlyargs: [],
62+
args: [],
63+
vararg: None,
64+
kwonlyargs: [],
65+
kwarg: None,
66+
},
67+
returns: None,
68+
body: [
69+
Pass(
70+
StmtPass {
71+
range: 42..46,
72+
},
73+
),
74+
],
75+
},
76+
),
77+
],
78+
},
79+
)
80+
```
81+
## Errors
82+
83+
|
84+
1 | call(a, b, # comment \
85+
2 | /
86+
3 | | def bar():
87+
| |_^ Syntax Error: Expected ')', found newline
88+
4 | pass
89+
|

0 commit comments

Comments
 (0)