Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Amazon Ion (#1017) #1947

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions lib/rouge/demos/ion
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Sample Ion document.
*/
{
key: "value",
'float': 1_234e-5,
"annotation": type:: null.string,
list: 'things':: ['symbol', nan, +inf, -inf, "other"],
sexp: (concat ('symbol' [list] "string" null)),
date: date:: 1970-01-01T00:00Z,
"long string": str:: (
'''
long strings are neat
'''
),
struct: {
nested: { value: int:: 12_345 },
},
blob: {{ SGVsbG8sIFdvcmxkIQ== }}, // Hello, World!
}
175 changes: 175 additions & 0 deletions lib/rouge/lexers/ion.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Ion < RegexLexer
title 'Ion'
desc 'Amazon Ion (https://amazon-ion.github.io/ion-docs)'
tag 'ion'
filenames '*.ion'
mimetypes 'application/ion'

escapes = %r{\\(?:[\\abtR"'/?\\]|x\h{2}|u\h{4}|U\h{8})}
q = %r{'(?:#{escapes}|\\'|[^"\n\r])+'}
qq = %r{"(?:#{escapes}|\\"|[^"\n\r])+"}
qqq = %r{'''}
symbol = %r{[A-Za-z_\$](?:[0-9A-Za-z_\$])*}

state :qq do
rule qq, Literal::String::Double
end

state :qqq do
rule %r{'''}, Literal::String::Double, :pop!
rule %r{[^']+}m, Literal::String::Double
rule %r{'}, Literal::String::Double
end

state :quotes do
rule qqq, Literal::String::Double, :qqq
mixin :qq
rule symbol, Literal::String::Symbol
rule q, Literal::String::Symbol
end

state :annotation do
annotation = %r{(?:[\u{0020}-\u{0026}]|[\u{0028}-\u{005B}]|[\u{005D}-\u{FFFF}]|[\t\b\f ])+}
rule %r{('#{annotation}'|#{symbol})(\s*)(::)} do
groups Name::Decorator, Text::Whitespace, Operator
end
end

state :comments do
rule %r{/\*.*?\*/}m, Comment::Multiline
rule %r{//.*?$}, Comment::Single
end

state :constants do
rule %r{(?:true|false)\b}, Name::Builtin
rule %r{null(?:\.(?:blob|bool|clob|decimal|float|int|list|null|sexp|string|struct|symbol|timestamp))?\b}, Name::Builtin
end

state :numbers do
rule %r{0b[01]+(?:_[01]+)*\b}, Literal::Number::Bin
rule %r{0x\h+(?:_\h+)*\b}, Literal::Number::Hex
rule %r{(?:nan|[+-]inf)\b}, Literal::Number::Float

integer = %r{-?(?:0|[1-9]\d*(?:_\d+)*)}
rule %r{#{integer}[.dD][+-]?(?:#{integer})*(?:[dDeE][+-]?#{integer})?}, Literal::Number::Float
rule %r{#{integer}[dDeE][+-]?#{integer}}, Literal::Number::Float
rule integer, Literal::Number::Integer
end

state :timestamps do
year = %r{000[1-9]|00[1-9]\d|0[1-9]\d{2}|[1-9]\d{3}}
month = %r{0[1-9]|1[0-2]}
day = %r{0[1-9]|[12]\d|3[01]}
date = %r{#{year}-#{month}-#{day}}

hour = %r{[01]\d|2[0-3]}
minute = %r{[0-5]\d}
second = %r{[0-5]\d(?:\.\d+)?}
offset = %r{Z|[+-]#{hour}:#{minute}}
time = %r{#{hour}:#{minute}(?::#{second})?#{offset}}

rule %r{#{date}(T#{time}?)?|#{year}(?:-#{month})?T}, Literal::Date
end

state :whitespace do
rule %r{\s+}, Text::Whitespace
end

state :blob do
rule %r/}}/, Punctuation::Indicator, :pop!

rule qqq, Literal::String::Double, :qqq
mixin :qq
mixin :whitespace

# no attempt to validate the Base64 blob
rule %r{(?:[A-Za-z0-9/\+=]+)}, Literal
end

state :containers do
rule %r/{{/, Punctuation::Indicator, :blob
rule %r{\[}, Punctuation::Indicator, :list
rule %r{\(}, Punctuation::Indicator, :sexp
rule %r/{/, Punctuation::Indicator, :struct
end

state :list do
rule %r{]}, Punctuation::Indicator, :pop!

mixin :containers
mixin :comments
mixin :annotation
mixin :whitespace
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes

rule %r{[,=;]}, Punctuation
end

state :sexp do
rule %r{\)}, Punctuation::Indicator, :pop!
rule %r{(?:\+\+|--|<<|>>|\&\&|\.\.|\|\||[-+\*/=<>|&$^.#!%?@`~])}, Operator

mixin :containers
mixin :comments
mixin :annotation
mixin :whitespace
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes
end

state :struct do
rule %r/}/, Punctuation::Indicator, :pop!

rule %r{(#{q}|#{qq}|#{symbol})(\s*)(:)} do
groups Name::Label, Text::Whitespace, Punctuation
push :value
end

mixin :containers
mixin :comments
mixin :whitespace
end

state :value do
mixin :containers
mixin :comments
mixin :whitespace
mixin :annotation
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes

rule %r{,}, Punctuation, :pop!

rule %r/(})/ do
groups Punctuation::Indicator
pop!(2)
end
end

state :root do
rule %r{\s*\A\$(?:ion_1_0|ion_symbol_table)\b}, Name::Builtin::Pseudo

mixin :comments
mixin :annotation
mixin :constants
mixin :timestamps
mixin :numbers
mixin :containers
mixin :quotes
mixin :whitespace
end
end
end
end
199 changes: 199 additions & 0 deletions spec/lexers/ion_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Ion do
let(:subject) { Rouge::Lexers::Ion.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'file.ion'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'application/ion'
end
end

describe 'lexing' do
include Support::Lexing

it 'handles a basic file' do
ion = <<~ION
/**
* comment
*/
'annotation' :: {
a: 1,
'b': 2.0,
"c": float:: 1.23e-4,
"d": 'null':: null.symbol,
"e": 'timestamps':: [
1970-01-01T00:00Z,
1970T,
],
f: 'constants and such':: [
nancy, nan,
"+infinity", +inf, -inf,
truee, false, true,
null.null, null,
nullish,
],
"g": (add [0b101, 0xdeadbeef, -1_000] "hello"),
h: { i: '''string''', },
j: {{ aW9u }},
}
ION

assert_no_errors ion

assert_tokens_equal ion,
["Comment.Multiline", "/**\n * comment\n */"],
["Text.Whitespace", "\n"],
["Name.Decorator", "'annotation'"],
["Text.Whitespace", " "],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{"],
["Text.Whitespace", "\n "],
["Name.Label", "a"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.Number.Integer", "1"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "'b'"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.Number.Float", "2.0"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"c\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "float"],
["Operator", "::"],
["Text.Whitespace", " "],
["Literal.Number.Float", "1.23e-4"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"d\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'null'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Name.Builtin", "null.symbol"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"e\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'timestamps'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Text.Whitespace", "\n "],
["Literal.Date", "1970-01-01T00:00Z"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.Date", "1970T"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Punctuation.Indicator", "]"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "f"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'constants and such'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "nancy"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "nan"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Double", "\"+infinity\""],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "+inf"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "-inf"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "truee"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "false"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "true"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Builtin", "null.null"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "null"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "nullish"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Punctuation.Indicator", "]"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"g\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "("],
["Literal.String.Symbol", "add"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Literal.Number.Bin", "0b101"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Hex", "0xdeadbeef"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Integer", "-1_000"],
["Punctuation.Indicator", "]"],
["Text.Whitespace", " "],
["Literal.String.Double", "\"hello\""],
["Punctuation.Indicator", ")"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "h"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{"],
["Text.Whitespace", " "],
["Name.Label", "i"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.String.Double", "'''string'''"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Punctuation.Indicator", "}"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "j"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{{"],
["Text.Whitespace", " "],
["Literal", "aW9u"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "}}"],
["Punctuation", ","],
["Text.Whitespace", "\n"],
["Punctuation.Indicator", "}"],
["Text.Whitespace", "\n"]
end
end
end
Loading