Skip to content

Commit

Permalink
Port Minecraft lexers from Pygments (#992)
Browse files Browse the repository at this point in the history
Ported lexers for mcfuntion, snbt from Pygments using
`pygments2chroma_xml.py` script.

While doing so, I encountered lack of `LiteralNumberByte` in TokenType,
so I've added the type and regenerated tokentype_enumer.go.
  • Loading branch information
kofuk authored Aug 22, 2024
1 parent 763c2ea commit 895a048
Show file tree
Hide file tree
Showing 8 changed files with 492 additions and 327 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ translators for Pygments lexers and styles.
| J | J, Java, JavaScript, JSON, Julia, Jungle |
| K | Kotlin |
| L | Lighttpd configuration file, LLVM, Lua |
| M | Makefile, Mako, markdown, Mason, Materialize SQL dialect, Mathematica, Matlab, mcfunction, Meson, Metal, MiniZinc, MLIR, Modula-2, MonkeyC, MorrowindScript, Myghty, MySQL |
| M | Makefile, Mako, markdown, Mason, Materialize SQL dialect, Mathematica, Matlab, MCFunction, Meson, Metal, MiniZinc, MLIR, Modula-2, MonkeyC, MorrowindScript, Myghty, MySQL |
| N | NASM, Natural, Newspeak, Nginx configuration file, Nim, Nix |
| O | Objective-C, OCaml, Octave, Odin, OnesEnterprise, OpenEdge ABL, OpenSCAD, Org Mode |
| P | PacmanConf, Perl, PHP, PHTML, Pig, PkgConfig, PL/pgSQL, plaintext, Plutus Core, Pony, PostgreSQL SQL dialect, PostScript, POVRay, PowerQuery, PowerShell, Prolog, PromQL, Promela, properties, Protocol Buffer, PRQL, PSL, Puppet, Python, Python 2 |
| Q | QBasic, QML |
| R | R, Racket, Ragel, Raku, react, ReasonML, reg, Rego, reStructuredText, Rexx, RPMSpec, Ruby, Rust |
| S | SAS, Sass, Scala, Scheme, Scilab, SCSS, Sed, Sieve, Smali, Smalltalk, Smarty, Snobol, Solidity, SourcePawn, SPARQL, SQL, SquidConf, Standard ML, stas, Stylus, Svelte, Swift, SYSTEMD, systemverilog |
| S | SAS, Sass, Scala, Scheme, Scilab, SCSS, Sed, Sieve, Smali, Smalltalk, Smarty, SNBT, Snobol, Solidity, SourcePawn, SPARQL, SQL, SquidConf, Standard ML, stas, Stylus, Svelte, Swift, SYSTEMD, systemverilog |
| T | TableGen, Tal, TASM, Tcl, Tcsh, Termcap, Terminfo, Terraform, TeX, Thrift, TOML, TradingView, Transact-SQL, Turing, Turtle, Twig, TypeScript, TypoScript, TypoScriptCssData, TypoScriptHtmlData |
| V | V, V shell, Vala, VB.net, verilog, VHDL, VHS, VimL, vue |
| W | WDTE, WebGPU Shading Language, Whiley |
Expand Down
300 changes: 128 additions & 172 deletions lexers/embedded/mcfunction.xml
Original file line number Diff line number Diff line change
@@ -1,182 +1,138 @@

<lexer>
<config>
<name>mcfunction</name>
<name>MCFunction</name>
<alias>mcfunction</alias>
<alias>mcf</alias>
<filename>*.mcfunction</filename>
<dot_all>true</dot_all>
<not_multiline>true</not_multiline>
<mime_type>text/mcfunction</mime_type>
</config>
<rules>
<state name="nbtobjectvalue">
<rule pattern="(&#34;(\\\\|\\&#34;|[^&#34;])*&#34;|[a-zA-Z0-9_]+)">
<token type="NameTag"/>
<push state="nbtobjectattribute"/>
</rule>
<rule pattern="\}">
<token type="Punctuation"/>
<pop depth="1"/>
</rule>
</state>
<state name="nbtarrayvalue">
<rule>
<include state="nbtvalue"/>
</rule>
<rule pattern=",">
<token type="Punctuation"/>
</rule>
<rule pattern="\]">
<token type="Punctuation"/>
<pop depth="1"/>
</rule>
</state>
<state name="nbtvalue">
<rule>
<include state="simplevalue"/>
</rule>
<rule pattern="\{">
<token type="Punctuation"/>
<push state="nbtobjectvalue"/>
</rule>
<rule pattern="\[">
<token type="Punctuation"/>
<push state="nbtarrayvalue"/>
</rule>
</state>
<state name="argumentvalue">
<rule>
<include state="simplevalue"/>
</rule>
<rule pattern=",">
<token type="Punctuation"/>
<pop depth="1"/>
</rule>
<rule pattern="[}\]]">
<token type="Punctuation"/>
<pop depth="2"/>
</rule>
</state>
<state name="argumentlist">
<rule pattern="(nbt)(={)">
<bygroups>
<token type="NameAttribute"/>
<token type="Punctuation"/>
</bygroups>
<push state="nbtobjectvalue"/>
</rule>
<rule pattern="([A-Za-z0-9/_!]+)(={)">
<bygroups>
<token type="NameAttribute"/>
<token type="Punctuation"/>
</bygroups>
<push state="argumentlist"/>
</rule>
<rule pattern="([A-Za-z0-9/_!]+)(=)">
<bygroups>
<token type="NameAttribute"/>
<token type="Punctuation"/>
</bygroups>
<push state="argumentvalue"/>
</rule>
<rule>
<include state="simplevalue"/>
</rule>
<rule pattern=",">
<token type="Punctuation"/>
</rule>
<rule pattern="[}\]]">
<token type="Punctuation"/>
<pop depth="1"/>
</rule>
</state>
<state name="root">
<rule pattern="#.*?\n">
<token type="CommentSingle"/>
</rule>
<rule pattern="/?(geteduclientinfo|clearspawnpoint|defaultgamemode|transferserver|toggledownfall|immutableworld|detectredstone|setidletimeout|playanimation|classroommode|spreadplayers|testforblocks|setmaxplayers|setworldspawn|testforblock|worldbuilder|createagent|worldborder|camerashake|advancement|raytracefog|locatebiome|tickingarea|replaceitem|attributes|spawnpoint|difficulty|experience|scoreboard|whitelist|structure|playsound|stopsound|forceload|spectate|gamerule|function|schedule|wsserver|teleport|position|save-off|particle|setblock|datapack|mobevent|transfer|gamemode|save-all|bossbar|enchant|trigger|collect|execute|weather|teammsg|tpagent|banlist|dropall|publish|tellraw|testfor|save-on|destroy|ability|locate|summon|remove|effect|reload|ban-ip|recipe|pardon|detect|music|clear|clone|event|mixer|debug|title|ride|stop|list|turn|data|team|kick|loot|tell|help|give|flog|fill|move|time|seed|kill|save|item|deop|code|tag|ban|msg|say|tp|me|op|xp|w|place)\b">
<token type="KeywordReserved"/>
</rule>
<rule pattern="(@p|@r|@a|@e|@s|@c|@v)">
<token type="KeywordConstant"/>
</rule>
<rule pattern="\[">
<token type="Punctuation"/>
<push state="argumentlist"/>
</rule>
<rule pattern="{">
<token type="Punctuation"/>
<push state="nbtobjectvalue"/>
</rule>
<rule pattern="~">
<token type="NameBuiltin"/>
</rule>
<rule pattern="([a-zA-Z_]+:)?[a-zA-Z_]+\b">
<token type="Text"/>
</rule>
<rule pattern="([a-z]+)(\.)([0-9]+)\b">
<bygroups>
<token type="Text"/>
<token type="Punctuation"/>
<token type="LiteralNumber"/>
</bygroups>
</rule>
<rule pattern="([&lt;&gt;=]|&lt;=|&gt;=)">
<token type="Punctuation"/>
</rule>
<rule>
<include state="simplevalue"/>
</rule>
<rule pattern="\s+">
<token type="TextWhitespace"/>
</rule>
</state>
<state name="simplevalue">
<rule pattern="(true|false)">
<token type="KeywordConstant"/>
</rule>
<rule pattern="[01]b">
<token type="LiteralNumber"/>
</rule>
<rule pattern="-?(0|[1-9]\d*)(\.\d+[eE](\+|-)?\d+|[eE](\+|-)?\d+|\.\d+)">
<token type="LiteralNumberFloat"/>
</rule>
<rule pattern="(-?\d+)(\.\.)(-?\d+)">
<bygroups>
<token type="LiteralNumberInteger"/>
<token type="Punctuation"/>
<token type="LiteralNumberInteger"/>
</bygroups>
</rule>
<rule pattern="-?(0|[1-9]\d*)">
<token type="LiteralNumberInteger"/>
</rule>
<rule pattern="&#34;(\\\\|\\&#34;|[^&#34;])*&#34;">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="&#39;[^&#39;]+&#39;">
<token type="LiteralStringSingle"/>
</rule>
<rule pattern="([!#]?)(\w+)">
<bygroups>
<token type="Punctuation"/>
<token type="Text"/>
</bygroups>
</rule>
</state>
<state name="nbtobjectattribute">
<rule>
<include state="nbtvalue"/>
</rule>
<rule pattern=":">
<token type="Punctuation"/>
</rule>
<rule pattern=",">
<token type="Punctuation"/>
<pop depth="1"/>
</rule>
<rule pattern="\}">
<token type="Punctuation"/>
<pop depth="2"/>
</rule>
<rule><include state="names"/></rule>
<rule><include state="comments"/></rule>
<rule><include state="literals"/></rule>
<rule><include state="whitespace"/></rule>
<rule><include state="property"/></rule>
<rule><include state="operators"/></rule>
<rule><include state="selectors"/></rule>
</state>
<state name="names">
<rule pattern="^(\s*)([a-z_]+)"><bygroups><token type="TextWhitespace"/><token type="NameBuiltin"/></bygroups></rule>
<rule pattern="(?&lt;=run)\s+[a-z_]+"><token type="NameBuiltin"/></rule>
<rule pattern="\b[0-9a-fA-F]+(?:-[0-9a-fA-F]+){4}\b"><token type="NameVariable"/></rule>
<rule><include state="resource-name"/></rule>
<rule pattern="[A-Za-z_][\w.#%$]+"><token type="KeywordConstant"/></rule>
<rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
</state>
<state name="resource-name">
<rule pattern="#?[a-z_][a-z_.-]*:[a-z0-9_./-]+"><token type="NameFunction"/></rule>
<rule pattern="#?[a-z0-9_\.\-]+\/[a-z0-9_\.\-\/]+"><token type="NameFunction"/></rule>
</state>
<state name="whitespace">
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
</state>
<state name="comments">
<rule pattern="^\s*(#[&gt;!])"><token type="CommentMultiline"/><push state="comments.block" state="comments.block.emphasized"/></rule>
<rule pattern="#.*$"><token type="CommentSingle"/></rule>
</state>
<state name="comments.block">
<rule pattern="^\s*#[&gt;!]"><token type="CommentMultiline"/><push state="comments.block.emphasized"/></rule>
<rule pattern="^\s*#"><token type="CommentMultiline"/><push state="comments.block.normal"/></rule>
<rule><pop depth="1"/></rule>
</state>
<state name="comments.block.normal">
<rule><include state="comments.block.special"/></rule>
<rule pattern="\S+"><token type="CommentMultiline"/></rule>
<rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
<rule><include state="whitespace"/></rule>
</state>
<state name="comments.block.emphasized">
<rule><include state="comments.block.special"/></rule>
<rule pattern="\S+"><token type="LiteralStringDoc"/></rule>
<rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
<rule><include state="whitespace"/></rule>
</state>
<state name="comments.block.special">
<rule pattern="@\S+"><token type="NameDecorator"/></rule>
<rule><include state="resource-name"/></rule>
<rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
</state>
<state name="operators">
<rule pattern="[\-~%^?!+*&lt;&gt;\\/|&amp;=.]"><token type="Operator"/></rule>
</state>
<state name="literals">
<rule pattern="\.\."><token type="Literal"/></rule>
<rule pattern="(true|false)"><token type="KeywordPseudo"/></rule>
<rule pattern="[A-Za-z_]+"><token type="NameVariableClass"/></rule>
<rule pattern="[0-7]b"><token type="LiteralNumberByte"/></rule>
<rule pattern="[+-]?\d*\.?\d+([eE]?[+-]?\d+)?[df]?\b"><token type="LiteralNumberFloat"/></rule>
<rule pattern="[+-]?\d+\b"><token type="LiteralNumberInteger"/></rule>
<rule pattern="&quot;"><token type="LiteralStringDouble"/><push state="literals.string-double"/></rule>
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><push state="literals.string-single"/></rule>
</state>
<state name="literals.string-double">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&quot;\n]+"><token type="LiteralStringDouble"/></rule>
<rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
</state>
<state name="literals.string-single">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&#x27;\n]+"><token type="LiteralStringSingle"/></rule>
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
</state>
<state name="selectors">
<rule pattern="@[a-z]"><token type="NameVariable"/></rule>
</state>
<state name="property">
<rule pattern="\{"><token type="Punctuation"/><push state="property.curly" state="property.key"/></rule>
<rule pattern="\["><token type="Punctuation"/><push state="property.square" state="property.key"/></rule>
</state>
<state name="property.curly">
<rule><include state="whitespace"/></rule>
<rule><include state="property"/></rule>
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
</state>
<state name="property.square">
<rule><include state="whitespace"/></rule>
<rule><include state="property"/></rule>
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
<rule pattern=","><token type="Punctuation"/></rule>
</state>
<state name="property.key">
<rule><include state="whitespace"/></rule>
<rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+(?=\s*\=)"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
<rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
<rule pattern="[A-Za-z_\-\+]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
<rule pattern="&quot;"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
<rule pattern="&#x27;"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
<rule pattern="-?\d+"><token type="LiteralNumberInteger"/><push state="property.delimiter"/></rule>
<rule><pop depth="1"/></rule>
</state>
<state name="property.key.string-double">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&quot;\n]+"><token type="NameAttribute"/></rule>
<rule pattern="&quot;"><token type="NameAttribute"/><pop depth="1"/></rule>
</state>
<state name="property.key.string-single">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&#x27;\n]+"><token type="NameAttribute"/></rule>
<rule pattern="&#x27;"><token type="NameAttribute"/><pop depth="1"/></rule>
</state>
<state name="property.delimiter">
<rule><include state="whitespace"/></rule>
<rule pattern="[:=]!?"><token type="Punctuation"/><push state="property.value"/></rule>
<rule pattern=","><token type="Punctuation"/></rule>
<rule><pop depth="1"/></rule>
</state>
<state name="property.value">
<rule><include state="whitespace"/></rule>
<rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
<rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
<rule><include state="literals"/></rule>
<rule><include state="property"/></rule>
<rule><pop depth="1"/></rule>
</state>
</rules>
</lexer>

58 changes: 58 additions & 0 deletions lexers/embedded/snbt.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

<lexer>
<config>
<name>SNBT</name>
<alias>snbt</alias>
<filename>*.snbt</filename>
<mime_type>text/snbt</mime_type>
</config>
<rules>
<state name="root">
<rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
<rule pattern="[^\{]+"><token type="Text"/></rule>
</state>
<state name="whitespace">
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
</state>
<state name="operators">
<rule pattern="[,:;]"><token type="Punctuation"/></rule>
</state>
<state name="literals">
<rule pattern="(true|false)"><token type="KeywordConstant"/></rule>
<rule pattern="-?\d+[eE]-?\d+"><token type="LiteralNumberFloat"/></rule>
<rule pattern="-?\d*\.\d+[fFdD]?"><token type="LiteralNumberFloat"/></rule>
<rule pattern="-?\d+[bBsSlLfFdD]?"><token type="LiteralNumberInteger"/></rule>
<rule pattern="&quot;"><token type="LiteralStringDouble"/><push state="literals.string_double"/></rule>
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><push state="literals.string_single"/></rule>
</state>
<state name="literals.string_double">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&quot;\n]+"><token type="LiteralStringDouble"/></rule>
<rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
</state>
<state name="literals.string_single">
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
<rule pattern="[^\\&#x27;\n]+"><token type="LiteralStringSingle"/></rule>
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
</state>
<state name="compound">
<rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
<rule><include state="operators"/></rule>
<rule><include state="whitespace"/></rule>
<rule><include state="literals"/></rule>
<rule pattern="\{"><token type="Punctuation"/><push/></rule>
<rule pattern="\["><token type="Punctuation"/><push state="list"/></rule>
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
</state>
<state name="list">
<rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
<rule><include state="literals"/></rule>
<rule><include state="operators"/></rule>
<rule><include state="whitespace"/></rule>
<rule pattern="\["><token type="Punctuation"/><push/></rule>
<rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
</state>
</rules>
</lexer>

Loading

0 comments on commit 895a048

Please sign in to comment.