Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow subscript numbers in identifiers #516

Merged
merged 2 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ impl<'a> Formatter<'a> {
}

self.output
.push_str(&crate::parse::canonicalize_exclams(&binding.name.value));
.push_str(&crate::parse::canonicalize_ident(&binding.name.value));
self.output
.push_str(if binding.public { " ←" } else { " ↚" });
if binding.array_macro {
Expand Down
29 changes: 24 additions & 5 deletions src/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ use unicode_segmentation::UnicodeSegmentation;

use crate::{ast::PlaceholderOp, ArraySwizzle, Inputs, Primitive, StackSwizzle, WILDCARD_CHAR};

/// Subscript digit characters
pub const SUBSCRIPT_NUMS: [char; 10] = ['₀', '₁', '₂', '₃', '₄', '₅', '₆', '₇', '₈', '₉'];

/// Lex a Uiua source file
pub fn lex(
input: &str,
Expand Down Expand Up @@ -793,7 +796,7 @@ impl<'a> Lexer<'a> {
"]" => self.end(CloseBracket, start),
"⟨" => self.end(OpenAngle, start),
"⟩" => self.end(CloseAngle, start),
"_" => self.end(Underscore, start),
"_" if self.peek_char() != Some("_") => self.end(Underscore, start),
"|" => self.end(Bar, start),
";" => self.end(Semicolon, start),
"-" if self.next_chars_exact(["-", "-"]) => self.end(TripleMinus, start),
Expand Down Expand Up @@ -1002,12 +1005,28 @@ impl<'a> Lexer<'a> {
}
}
// Identifiers and unformatted glyphs
c if is_custom_glyph(c) || c.chars().all(is_ident_char) || c == "&" => {
c if is_custom_glyph(c) || c.chars().all(is_ident_char) || c == "&" || c == "_" => {
let mut ident = c.to_string();
// Collect characters
if !is_custom_glyph(c) {
while let Some(c) = self.next_char_if_all(is_ident_char) {
ident.push_str(c);
// Handle identifiers beginning with __
if c == "_" && self.next_char_exact("_") {
ident.push_str("__");
while let Some(dc) = self.next_char_if_all(|c| c.is_ascii_digit()) {
ident.push_str(dc);
}
}
loop {
if let Some(c) = self.next_char_if_all(is_ident_char) {
ident.push_str(c);
} else if self.next_chars_exact(["_"; 2]) {
ident.push_str("__");
while let Some(dc) = self.next_char_if_all(|c| c.is_ascii_digit()) {
ident.push_str(dc);
}
} else {
break;
}
}
}
let mut exclam_count = 0;
Expand Down Expand Up @@ -1376,7 +1395,7 @@ fn parse_format_fragments(s: &str) -> Vec<String> {

/// Whether a character can be part of a Uiua identifier
pub fn is_ident_char(c: char) -> bool {
c.is_alphabetic() && !"ⁿₙπτηℂλ".contains(c)
c.is_alphabetic() && !"ⁿₙπτηℂλ".contains(c) || SUBSCRIPT_NUMS.contains(&c)
}

/// Whether a string is a custom glyph
Expand Down
29 changes: 25 additions & 4 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ impl<'i> Parser<'i> {
match token {
Token::Ident if line.is_some() => {
let line = line.as_mut().unwrap();
let ident = canonicalize_exclams(&self.input[span.byte_range()]);
let ident = canonicalize_ident(&self.input[span.byte_range()]);
let name = span.clone().sp(ident);
line.items.push(name);
}
Expand Down Expand Up @@ -523,8 +523,8 @@ impl<'i> Parser<'i> {
}
fn try_ident(&mut self) -> Option<Sp<Ident>> {
let span = self.try_exact(Token::Ident)?;
let s: Ident = canonicalize_exclams(&self.input[span.byte_range()]);
Some(span.sp(s))
let ident = canonicalize_ident(&self.input[span.byte_range()]);
Some(span.sp(ident))
}
fn try_ref(&mut self) -> Option<Sp<Word>> {
let mut checkpoint = self.index;
Expand Down Expand Up @@ -1348,11 +1348,32 @@ pub fn place_exclams(ident: &str, count: usize) -> Ident {
}

/// Rewrite the identifier with the same number of exclamation points using double and single exclamation point characters as needed
pub fn canonicalize_exclams(ident: &str) -> Ident {
fn canonicalize_exclams(ident: &str) -> Ident {
let num_margs = crate::parse::ident_modifier_args(ident);
place_exclams(ident, num_margs)
}

/// Rewrite the identifier with numerals preceded by `__` replaced with subscript characters
fn canonicalize_subscripts(ident: &str) -> Ident {
// This hasty canonicalization is okay because the stricter
// rules about the syntax are handled in the lexer
ident
.chars()
.filter(|c| *c != '_')
.map(|c| {
if let Some(d) = c.to_digit(10) {
crate::lex::SUBSCRIPT_NUMS[d as usize]
} else {
c
}
})
.collect()
}

pub fn canonicalize_ident(ident: &str) -> Ident {
canonicalize_subscripts(&canonicalize_exclams(ident))
}

pub(crate) fn count_placeholders(words: &[Sp<Word>]) -> usize {
let mut count = 0;
for word in words {
Expand Down