Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
keithamus committed Feb 4, 2024
1 parent 40faef6 commit 7837a35
Show file tree
Hide file tree
Showing 63 changed files with 1,041,242 additions and 1,663,685 deletions.
4 changes: 1 addition & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 10 additions & 11 deletions crates/hdx_ast/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ impl Span {
Self { start, end }
}

#[inline]
pub const fn start(start: u32) -> Self {
Self { start, end: start }
}

#[inline]
pub fn end(&mut self, end: u32) {
self.end = end
}

pub fn dummy() -> Self {
Self::new(u32::default(), u32::default())
}
Expand All @@ -33,17 +43,6 @@ impl Span {
pub fn source_text<'a>(&self, source_text: &'a str) -> &'a str {
&source_text[self.start as usize..self.end as usize]
}

/// Returns a `Span` from the beginning of `self` until the beginning of `end`.
///
/// ```text
/// ____ ___
/// self lorem ipsum end
/// ^^^^^^^^^^^^^^^^^
/// ```
pub fn until(&self, span: Self) -> Self {
Self { start: self.start, end: span.start }
}
}

impl Display for Span {
Expand Down
20 changes: 15 additions & 5 deletions crates/hdx_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ mod token;
use std::{collections::VecDeque, str::Chars};

use oxc_allocator::Allocator;
pub use token::{Escaped, NumType, PairWise, Token};
pub use token::{NumType, PairWise, Token};

#[derive(Debug, Clone)]
pub struct LexerCheckpoint<'a> {
chars: Chars<'a>,
token: Token,
prev_pos: u32,
}

pub struct Lexer<'a> {
Expand All @@ -24,7 +25,7 @@ pub struct Lexer<'a> {
impl<'a> Lexer<'a> {
pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
let token = Token::default();
let current = LexerCheckpoint { chars: source.chars(), token };
let current = LexerCheckpoint { chars: source.chars(), token, prev_pos: 0 };
Self { allocator, source, current, lookahead: VecDeque::with_capacity(4) }
}

Expand All @@ -42,7 +43,11 @@ impl<'a> Lexer<'a> {
/// Creates a checkpoint storing the current lexer state.
/// Use `rewind` to restore the lexer to the state stored in the checkpoint.
pub fn checkpoint(&self) -> LexerCheckpoint<'a> {
LexerCheckpoint { chars: self.current.chars.clone(), token: self.current.token.clone() }
LexerCheckpoint {
prev_pos: self.current.prev_pos,
chars: self.current.chars.clone(),
token: self.current.token.clone(),
}
}

/// Rewinds the lexer to the same state as when the passed in `checkpoint` was created.
Expand Down Expand Up @@ -71,10 +76,15 @@ impl<'a> Lexer<'a> {
// `self.current = checkpoint`
self.current.token = Token::default();

let prev_pos = self.pos();

for _i in self.lookahead.len()..n {
let peeked = self.read_next_token();
self.lookahead
.push_back(LexerCheckpoint { chars: self.current.chars.clone(), token: peeked });
self.lookahead.push_back(LexerCheckpoint {
prev_pos,
chars: self.current.chars.clone(),
token: peeked,
});
}

self.current = checkpoint;
Expand Down
64 changes: 27 additions & 37 deletions crates/hdx_lexer/src/private.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use hdx_syntax::{
use crate::{
constants::{SINGLE_CHAR_TOKENS, SURROGATE_RANGE},
string_builder::AutoCow,
token::{Escaped, NumType, Token},
token::{NumType, Token},
Lexer,
};

Expand Down Expand Up @@ -57,14 +57,14 @@ impl<'a> Lexer<'a> {
if is_ident_start_sequence(c, self.nth(1), self.nth(2)) {
return self.consume_ident_like_token();
}
if self.is_number_start(c) {
if self.is_number_start() {
return self.consume_numeric_token();
}
Token::Delim(self.current.chars.next().unwrap())
}
// Dot or Plus
'.' | '+' => {
if self.is_number_start(c) {
if self.is_number_start() {
return self.consume_numeric_token();
}
Token::Delim(self.current.chars.next().unwrap())
Expand All @@ -83,6 +83,7 @@ impl<'a> Lexer<'a> {
// Hash / Pound Sign
'#' => {
if is_ident(self.nth(1)) || is_escape_sequence(self.nth(1), self.nth(2)) {
self.current.chars.next();
self.consume_hash_token()
} else {
Token::Delim(self.current.chars.next().unwrap())
Expand All @@ -91,8 +92,9 @@ impl<'a> Lexer<'a> {
// Commercial At
'@' => {
if is_ident_start_sequence(self.nth(1), self.nth(2), self.nth(3)) {
let (ident, escaped) = self.consume_ident_sequence();
return Token::AtKeyword(ident, escaped);
self.current.chars.next();
let ident = self.consume_ident_sequence();
return Token::AtKeyword(ident);
}
Token::Delim(self.current.chars.next().unwrap())
}
Expand Down Expand Up @@ -129,21 +131,19 @@ impl<'a> Lexer<'a> {
}
}

fn consume_ident_sequence(&mut self) -> (Atom, Escaped) {
fn consume_ident_sequence(&mut self) -> Atom {
let mut builder = AutoCow::new(self);
let mut escaped = Escaped::No;
loop {
let mut c = self.nth(0);
if is_ident(c) {
c = self.current.chars.next().unwrap();
builder.push_matching(c);
} else if is_escape_sequence(c, self.nth(1)) {
escaped = Escaped::Yes;
self.current.chars.next();
builder.force_allocation_without_current_ascii_char(self);
builder.push_different(self.consume_escape_sequence());
} else {
return (Atom::from(builder.finish(self)), escaped);
return Atom::from(builder.finish(self));
}
}
}
Expand All @@ -164,7 +164,6 @@ impl<'a> Lexer<'a> {
let mut builder = AutoCow::new(self);
builder.start = self.remaining();
builder.value = None;
let mut escaped = Escaped::No;
loop {
let c = self.current.chars.next().unwrap_or(EOF);
match c {
Expand Down Expand Up @@ -199,7 +198,6 @@ impl<'a> Lexer<'a> {
'\\' => {
if is_escape_sequence(c, self.nth(0)) {
builder.force_allocation_without_current_ascii_char(self);
escaped = Escaped::Yes;
let c = self.consume_escape_sequence();
builder.push_different(c);
} else {
Expand All @@ -212,7 +210,7 @@ impl<'a> Lexer<'a> {
}
}
}
Token::Url(Atom::from(builder.finish(self)), escaped)
Token::Url(Atom::from(builder.finish(self)))
}

fn consume_remnants_of_bad_url(&mut self) -> Token {
Expand Down Expand Up @@ -268,23 +266,19 @@ impl<'a> Lexer<'a> {
match self.nth(0) {
'%' => {
self.current.chars.next();
Token::Dimension(num_type, value, atom!("%"), Escaped::No)
Token::Dimension(num_type, value, atom!("%"))
}
c if is_ident_start_sequence(c, self.nth(1), self.nth(2)) => {
let (unit, escaped) = self.consume_ident_sequence();
Token::Dimension(num_type, value, unit, escaped)
let unit = self.consume_ident_sequence();
Token::Dimension(num_type, value, unit)
}
_ => Token::Number(num_type, value),
}
}

fn consume_hash_token(&mut self) -> Token {
let (ident, escaped) = self.consume_ident_sequence();
if ident.starts_with(is_ident_start) {
Token::HashId(ident, escaped)
} else {
Token::Hash(ident, escaped)
}
let ident = self.consume_ident_sequence();
if ident.starts_with(is_ident_start) { Token::HashId(ident) } else { Token::Hash(ident) }
}

fn consume_decimal_digits(&mut self) {
Expand All @@ -294,7 +288,7 @@ impl<'a> Lexer<'a> {
}

fn consume_ident_like_token(&mut self) -> Token {
let (ident, escaped) = self.consume_ident_sequence();
let ident = self.consume_ident_sequence();
if self.nth(0) == '(' {
self.current.chars.next();
if is_url_ident(&ident) {
Expand All @@ -310,30 +304,26 @@ impl<'a> Lexer<'a> {
return self.consume_url_sequence();
}
}
return Token::Function(ident, escaped);
return Token::Function(ident);
}
Token::Ident(ident, escaped)
Token::Ident(ident)
}

fn consume_string_token(&mut self) -> Token {
let delimiter = self.current.chars.next().unwrap();
let mut builder = AutoCow::new(self);
let mut escaped = Escaped::No;
loop {
match self.nth(0) {
c if is_newline(c) => {
return Token::BadString;
}
EOF => {
return Token::String(Atom::from(builder.finish(self)), escaped);
return Token::String(Atom::from(builder.finish(self)));
}
c @ ('"' | '\'') => {
self.current.chars.next();
if c == delimiter {
return Token::String(
Atom::from(builder.finish_without_push(self)),
escaped,
);
return Token::String(Atom::from(builder.finish_without_push(self)));
}
builder.push_matching(c);
}
Expand All @@ -342,13 +332,12 @@ impl<'a> Lexer<'a> {
builder.force_allocation_without_current_ascii_char(self);
match self.nth(0) {
EOF => {
return Token::String(Atom::from(builder.finish(self)), escaped);
return Token::String(Atom::from(builder.finish(self)));
}
p if is_newline(p) => {
self.current.chars.next();
}
p if is_escape_sequence(c, p) => {
escaped = Escaped::Yes;
builder.push_different(self.consume_escape_sequence());
}
_ => {
Expand All @@ -374,11 +363,12 @@ impl<'a> Lexer<'a> {
Token::Comment
}

fn is_number_start(&mut self, c: char) -> bool {
c.is_ascii_digit()
|| (is_sign(c) && self.nth(0).is_ascii_digit())
|| (is_sign(c) && self.nth(0) == '.' && self.nth(1).is_ascii_digit())
|| (c == '.' && self.nth(0).is_ascii_digit())
fn is_number_start(&mut self) -> bool {
self.nth(0).is_ascii_digit()
|| (is_sign(self.nth(0))
&& (self.nth(1).is_ascii_digit()
|| self.nth(1) == '.' && self.nth(2).is_ascii_digit()))
|| (self.nth(0) == '.' && self.nth(1).is_ascii_digit())
}

fn hex_digit(&mut self) -> Option<u32> {
Expand Down
1 change: 0 additions & 1 deletion crates/hdx_lexer/src/string_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ impl<'a> AutoCow<'a> {

// Push a char that matches lexer.chars().next()
pub fn push_matching(&mut self, c: char) {
println!("push_matching {}", c);
if let Some(text) = &mut self.value {
text.push(c);
}
Expand Down
Loading

0 comments on commit 7837a35

Please sign in to comment.